summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--LICENSE674
-rw-r--r--README.md462
-rw-r--r--README_RU.md471
-rw-r--r--__init__.py39
-rw-r--r--install.bat37
-rw-r--r--install.py104
-rw-r--r--nodes.py1237
-rw-r--r--pyproject.toml.off15
-rw-r--r--r_chainner/archs/face/gfpganv1_clean_arch.py370
-rw-r--r--r_chainner/archs/face/stylegan2_clean_arch.py453
-rw-r--r--r_chainner/model_loading.py28
-rw-r--r--r_chainner/types.py18
-rw-r--r--r_facelib/__init__.py0
-rw-r--r--r_facelib/detection/__init__.py102
-rw-r--r--r_facelib/detection/align_trans.py219
-rw-r--r--r_facelib/detection/matlab_cp2tform.py317
-rw-r--r--r_facelib/detection/retinaface/retinaface.py389
-rw-r--r--r_facelib/detection/retinaface/retinaface_net.py196
-rw-r--r--r_facelib/detection/retinaface/retinaface_utils.py421
-rw-r--r--r_facelib/detection/yolov5face/__init__.py0
-rw-r--r--r_facelib/detection/yolov5face/face_detector.py141
-rw-r--r--r_facelib/detection/yolov5face/models/__init__.py0
-rw-r--r--r_facelib/detection/yolov5face/models/common.py299
-rw-r--r--r_facelib/detection/yolov5face/models/experimental.py45
-rw-r--r--r_facelib/detection/yolov5face/models/yolo.py235
-rw-r--r--r_facelib/detection/yolov5face/models/yolov5l.yaml47
-rw-r--r--r_facelib/detection/yolov5face/models/yolov5n.yaml45
-rw-r--r--r_facelib/detection/yolov5face/utils/__init__.py0
-rw-r--r--r_facelib/detection/yolov5face/utils/autoanchor.py12
-rw-r--r--r_facelib/detection/yolov5face/utils/datasets.py35
-rw-r--r--r_facelib/detection/yolov5face/utils/extract_ckpt.py5
-rw-r--r--r_facelib/detection/yolov5face/utils/general.py271
-rw-r--r--r_facelib/detection/yolov5face/utils/torch_utils.py40
-rw-r--r--r_facelib/parsing/__init__.py23
-rw-r--r--r_facelib/parsing/bisenet.py140
-rw-r--r--r_facelib/parsing/parsenet.py194
-rw-r--r--r_facelib/parsing/resnet.py69
-rw-r--r--r_facelib/utils/__init__.py7
-rw-r--r--r_facelib/utils/face_restoration_helper.py455
-rw-r--r--r_facelib/utils/face_utils.py248
-rw-r--r--r_facelib/utils/misc.py143
-rw-r--r--reactor_patcher.py135
-rw-r--r--reactor_utils.py231
-rw-r--r--requirements.txt7
-rw-r--r--scripts/__init__.py0
-rw-r--r--scripts/r_archs/__init__.py0
-rw-r--r--scripts/r_archs/codeformer_arch.py278
-rw-r--r--scripts/r_archs/vqgan_arch.py437
-rw-r--r--scripts/r_faceboost/__init__.py0
-rw-r--r--scripts/r_faceboost/restorer.py130
-rw-r--r--scripts/r_faceboost/swapper.py42
-rw-r--r--scripts/r_masking/__init__.py0
-rw-r--r--scripts/r_masking/core.py647
-rw-r--r--scripts/r_masking/segs.py22
-rw-r--r--scripts/r_masking/subcore.py117
-rw-r--r--scripts/reactor_faceswap.py185
-rw-r--r--scripts/reactor_logger.py47
-rw-r--r--scripts/reactor_swapper.py572
-rw-r--r--scripts/reactor_version.py13
59 files changed, 10869 insertions, 0 deletions
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..3877ae0
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,674 @@
+ GNU GENERAL PUBLIC LICENSE
+ Version 3, 29 June 2007
+
+ Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The GNU General Public License is a free, copyleft license for
+software and other kinds of works.
+
+ The licenses for most software and other practical works are designed
+to take away your freedom to share and change the works. By contrast,
+the GNU General Public License is intended to guarantee your freedom to
+share and change all versions of a program--to make sure it remains free
+software for all its users. We, the Free Software Foundation, use the
+GNU General Public License for most of our software; it applies also to
+any other work released this way by its authors. You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+them if you wish), that you receive source code or can get it if you
+want it, that you can change the software or use pieces of it in new
+free programs, and that you know you can do these things.
+
+ To protect your rights, we need to prevent others from denying you
+these rights or asking you to surrender the rights. Therefore, you have
+certain responsibilities if you distribute copies of the software, or if
+you modify it: responsibilities to respect the freedom of others.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must pass on to the recipients the same
+freedoms that you received. You must make sure that they, too, receive
+or can get the source code. And you must show them these terms so they
+know their rights.
+
+ Developers that use the GNU GPL protect your rights with two steps:
+(1) assert copyright on the software, and (2) offer you this License
+giving you legal permission to copy, distribute and/or modify it.
+
+ For the developers' and authors' protection, the GPL clearly explains
+that there is no warranty for this free software. For both users' and
+authors' sake, the GPL requires that modified versions be marked as
+changed, so that their problems will not be attributed erroneously to
+authors of previous versions.
+
+ Some devices are designed to deny users access to install or run
+modified versions of the software inside them, although the manufacturer
+can do so. This is fundamentally incompatible with the aim of
+protecting users' freedom to change the software. The systematic
+pattern of such abuse occurs in the area of products for individuals to
+use, which is precisely where it is most unacceptable. Therefore, we
+have designed this version of the GPL to prohibit the practice for those
+products. If such problems arise substantially in other domains, we
+stand ready to extend this provision to those domains in future versions
+of the GPL, as needed to protect the freedom of users.
+
+ Finally, every program is threatened constantly by software patents.
+States should not allow patents to restrict development and use of
+software on general-purpose computers, but in those that do, we wish to
+avoid the special danger that patents applied to a free program could
+make it effectively proprietary. To prevent this, the GPL assures that
+patents cannot be used to render the program non-free.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+ TERMS AND CONDITIONS
+
+ 0. Definitions.
+
+ "This License" refers to version 3 of the GNU General Public License.
+
+ "Copyright" also means copyright-like laws that apply to other kinds of
+works, such as semiconductor masks.
+
+ "The Program" refers to any copyrightable work licensed under this
+License. Each licensee is addressed as "you". "Licensees" and
+"recipients" may be individuals or organizations.
+
+ To "modify" a work means to copy from or adapt all or part of the work
+in a fashion requiring copyright permission, other than the making of an
+exact copy. The resulting work is called a "modified version" of the
+earlier work or a work "based on" the earlier work.
+
+ A "covered work" means either the unmodified Program or a work based
+on the Program.
+
+ To "propagate" a work means to do anything with it that, without
+permission, would make you directly or secondarily liable for
+infringement under applicable copyright law, except executing it on a
+computer or modifying a private copy. Propagation includes copying,
+distribution (with or without modification), making available to the
+public, and in some countries other activities as well.
+
+ To "convey" a work means any kind of propagation that enables other
+parties to make or receive copies. Mere interaction with a user through
+a computer network, with no transfer of a copy, is not conveying.
+
+ An interactive user interface displays "Appropriate Legal Notices"
+to the extent that it includes a convenient and prominently visible
+feature that (1) displays an appropriate copyright notice, and (2)
+tells the user that there is no warranty for the work (except to the
+extent that warranties are provided), that licensees may convey the
+work under this License, and how to view a copy of this License. If
+the interface presents a list of user commands or options, such as a
+menu, a prominent item in the list meets this criterion.
+
+ 1. Source Code.
+
+ The "source code" for a work means the preferred form of the work
+for making modifications to it. "Object code" means any non-source
+form of a work.
+
+ A "Standard Interface" means an interface that either is an official
+standard defined by a recognized standards body, or, in the case of
+interfaces specified for a particular programming language, one that
+is widely used among developers working in that language.
+
+ The "System Libraries" of an executable work include anything, other
+than the work as a whole, that (a) is included in the normal form of
+packaging a Major Component, but which is not part of that Major
+Component, and (b) serves only to enable use of the work with that
+Major Component, or to implement a Standard Interface for which an
+implementation is available to the public in source code form. A
+"Major Component", in this context, means a major essential component
+(kernel, window system, and so on) of the specific operating system
+(if any) on which the executable work runs, or a compiler used to
+produce the work, or an object code interpreter used to run it.
+
+ The "Corresponding Source" for a work in object code form means all
+the source code needed to generate, install, and (for an executable
+work) run the object code and to modify the work, including scripts to
+control those activities. However, it does not include the work's
+System Libraries, or general-purpose tools or generally available free
+programs which are used unmodified in performing those activities but
+which are not part of the work. For example, Corresponding Source
+includes interface definition files associated with source files for
+the work, and the source code for shared libraries and dynamically
+linked subprograms that the work is specifically designed to require,
+such as by intimate data communication or control flow between those
+subprograms and other parts of the work.
+
+ The Corresponding Source need not include anything that users
+can regenerate automatically from other parts of the Corresponding
+Source.
+
+ The Corresponding Source for a work in source code form is that
+same work.
+
+ 2. Basic Permissions.
+
+ All rights granted under this License are granted for the term of
+copyright on the Program, and are irrevocable provided the stated
+conditions are met. This License explicitly affirms your unlimited
+permission to run the unmodified Program. The output from running a
+covered work is covered by this License only if the output, given its
+content, constitutes a covered work. This License acknowledges your
+rights of fair use or other equivalent, as provided by copyright law.
+
+ You may make, run and propagate covered works that you do not
+convey, without conditions so long as your license otherwise remains
+in force. You may convey covered works to others for the sole purpose
+of having them make modifications exclusively for you, or provide you
+with facilities for running those works, provided that you comply with
+the terms of this License in conveying all material for which you do
+not control copyright. Those thus making or running the covered works
+for you must do so exclusively on your behalf, under your direction
+and control, on terms that prohibit them from making any copies of
+your copyrighted material outside their relationship with you.
+
+ Conveying under any other circumstances is permitted solely under
+the conditions stated below. Sublicensing is not allowed; section 10
+makes it unnecessary.
+
+ 3. Protecting Users' Legal Rights From Anti-Circumvention Law.
+
+ No covered work shall be deemed part of an effective technological
+measure under any applicable law fulfilling obligations under article
+11 of the WIPO copyright treaty adopted on 20 December 1996, or
+similar laws prohibiting or restricting circumvention of such
+measures.
+
+ When you convey a covered work, you waive any legal power to forbid
+circumvention of technological measures to the extent such circumvention
+is effected by exercising rights under this License with respect to
+the covered work, and you disclaim any intention to limit operation or
+modification of the work as a means of enforcing, against the work's
+users, your or third parties' legal rights to forbid circumvention of
+technological measures.
+
+ 4. Conveying Verbatim Copies.
+
+ You may convey verbatim copies of the Program's source code as you
+receive it, in any medium, provided that you conspicuously and
+appropriately publish on each copy an appropriate copyright notice;
+keep intact all notices stating that this License and any
+non-permissive terms added in accord with section 7 apply to the code;
+keep intact all notices of the absence of any warranty; and give all
+recipients a copy of this License along with the Program.
+
+ You may charge any price or no price for each copy that you convey,
+and you may offer support or warranty protection for a fee.
+
+ 5. Conveying Modified Source Versions.
+
+ You may convey a work based on the Program, or the modifications to
+produce it from the Program, in the form of source code under the
+terms of section 4, provided that you also meet all of these conditions:
+
+ a) The work must carry prominent notices stating that you modified
+ it, and giving a relevant date.
+
+ b) The work must carry prominent notices stating that it is
+ released under this License and any conditions added under section
+ 7. This requirement modifies the requirement in section 4 to
+ "keep intact all notices".
+
+ c) You must license the entire work, as a whole, under this
+ License to anyone who comes into possession of a copy. This
+ License will therefore apply, along with any applicable section 7
+ additional terms, to the whole of the work, and all its parts,
+ regardless of how they are packaged. This License gives no
+ permission to license the work in any other way, but it does not
+ invalidate such permission if you have separately received it.
+
+ d) If the work has interactive user interfaces, each must display
+ Appropriate Legal Notices; however, if the Program has interactive
+ interfaces that do not display Appropriate Legal Notices, your
+ work need not make them do so.
+
+ A compilation of a covered work with other separate and independent
+works, which are not by their nature extensions of the covered work,
+and which are not combined with it such as to form a larger program,
+in or on a volume of a storage or distribution medium, is called an
+"aggregate" if the compilation and its resulting copyright are not
+used to limit the access or legal rights of the compilation's users
+beyond what the individual works permit. Inclusion of a covered work
+in an aggregate does not cause this License to apply to the other
+parts of the aggregate.
+
+ 6. Conveying Non-Source Forms.
+
+ You may convey a covered work in object code form under the terms
+of sections 4 and 5, provided that you also convey the
+machine-readable Corresponding Source under the terms of this License,
+in one of these ways:
+
+ a) Convey the object code in, or embodied in, a physical product
+ (including a physical distribution medium), accompanied by the
+ Corresponding Source fixed on a durable physical medium
+ customarily used for software interchange.
+
+ b) Convey the object code in, or embodied in, a physical product
+ (including a physical distribution medium), accompanied by a
+ written offer, valid for at least three years and valid for as
+ long as you offer spare parts or customer support for that product
+ model, to give anyone who possesses the object code either (1) a
+ copy of the Corresponding Source for all the software in the
+ product that is covered by this License, on a durable physical
+ medium customarily used for software interchange, for a price no
+ more than your reasonable cost of physically performing this
+ conveying of source, or (2) access to copy the
+ Corresponding Source from a network server at no charge.
+
+ c) Convey individual copies of the object code with a copy of the
+ written offer to provide the Corresponding Source. This
+ alternative is allowed only occasionally and noncommercially, and
+ only if you received the object code with such an offer, in accord
+ with subsection 6b.
+
+ d) Convey the object code by offering access from a designated
+ place (gratis or for a charge), and offer equivalent access to the
+ Corresponding Source in the same way through the same place at no
+ further charge. You need not require recipients to copy the
+ Corresponding Source along with the object code. If the place to
+ copy the object code is a network server, the Corresponding Source
+ may be on a different server (operated by you or a third party)
+ that supports equivalent copying facilities, provided you maintain
+ clear directions next to the object code saying where to find the
+ Corresponding Source. Regardless of what server hosts the
+ Corresponding Source, you remain obligated to ensure that it is
+ available for as long as needed to satisfy these requirements.
+
+ e) Convey the object code using peer-to-peer transmission, provided
+ you inform other peers where the object code and Corresponding
+ Source of the work are being offered to the general public at no
+ charge under subsection 6d.
+
+ A separable portion of the object code, whose source code is excluded
+from the Corresponding Source as a System Library, need not be
+included in conveying the object code work.
+
+ A "User Product" is either (1) a "consumer product", which means any
+tangible personal property which is normally used for personal, family,
+or household purposes, or (2) anything designed or sold for incorporation
+into a dwelling. In determining whether a product is a consumer product,
+doubtful cases shall be resolved in favor of coverage. For a particular
+product received by a particular user, "normally used" refers to a
+typical or common use of that class of product, regardless of the status
+of the particular user or of the way in which the particular user
+actually uses, or expects or is expected to use, the product. A product
+is a consumer product regardless of whether the product has substantial
+commercial, industrial or non-consumer uses, unless such uses represent
+the only significant mode of use of the product.
+
+ "Installation Information" for a User Product means any methods,
+procedures, authorization keys, or other information required to install
+and execute modified versions of a covered work in that User Product from
+a modified version of its Corresponding Source. The information must
+suffice to ensure that the continued functioning of the modified object
+code is in no case prevented or interfered with solely because
+modification has been made.
+
+ If you convey an object code work under this section in, or with, or
+specifically for use in, a User Product, and the conveying occurs as
+part of a transaction in which the right of possession and use of the
+User Product is transferred to the recipient in perpetuity or for a
+fixed term (regardless of how the transaction is characterized), the
+Corresponding Source conveyed under this section must be accompanied
+by the Installation Information. But this requirement does not apply
+if neither you nor any third party retains the ability to install
+modified object code on the User Product (for example, the work has
+been installed in ROM).
+
+ The requirement to provide Installation Information does not include a
+requirement to continue to provide support service, warranty, or updates
+for a work that has been modified or installed by the recipient, or for
+the User Product in which it has been modified or installed. Access to a
+network may be denied when the modification itself materially and
+adversely affects the operation of the network or violates the rules and
+protocols for communication across the network.
+
+ Corresponding Source conveyed, and Installation Information provided,
+in accord with this section must be in a format that is publicly
+documented (and with an implementation available to the public in
+source code form), and must require no special password or key for
+unpacking, reading or copying.
+
+ 7. Additional Terms.
+
+ "Additional permissions" are terms that supplement the terms of this
+License by making exceptions from one or more of its conditions.
+Additional permissions that are applicable to the entire Program shall
+be treated as though they were included in this License, to the extent
+that they are valid under applicable law. If additional permissions
+apply only to part of the Program, that part may be used separately
+under those permissions, but the entire Program remains governed by
+this License without regard to the additional permissions.
+
+ When you convey a copy of a covered work, you may at your option
+remove any additional permissions from that copy, or from any part of
+it. (Additional permissions may be written to require their own
+removal in certain cases when you modify the work.) You may place
+additional permissions on material, added by you to a covered work,
+for which you have or can give appropriate copyright permission.
+
+ Notwithstanding any other provision of this License, for material you
+add to a covered work, you may (if authorized by the copyright holders of
+that material) supplement the terms of this License with terms:
+
+ a) Disclaiming warranty or limiting liability differently from the
+ terms of sections 15 and 16 of this License; or
+
+ b) Requiring preservation of specified reasonable legal notices or
+ author attributions in that material or in the Appropriate Legal
+ Notices displayed by works containing it; or
+
+ c) Prohibiting misrepresentation of the origin of that material, or
+ requiring that modified versions of such material be marked in
+ reasonable ways as different from the original version; or
+
+ d) Limiting the use for publicity purposes of names of licensors or
+ authors of the material; or
+
+ e) Declining to grant rights under trademark law for use of some
+ trade names, trademarks, or service marks; or
+
+ f) Requiring indemnification of licensors and authors of that
+ material by anyone who conveys the material (or modified versions of
+ it) with contractual assumptions of liability to the recipient, for
+ any liability that these contractual assumptions directly impose on
+ those licensors and authors.
+
+ All other non-permissive additional terms are considered "further
+restrictions" within the meaning of section 10. If the Program as you
+received it, or any part of it, contains a notice stating that it is
+governed by this License along with a term that is a further
+restriction, you may remove that term. If a license document contains
+a further restriction but permits relicensing or conveying under this
+License, you may add to a covered work material governed by the terms
+of that license document, provided that the further restriction does
+not survive such relicensing or conveying.
+
+ If you add terms to a covered work in accord with this section, you
+must place, in the relevant source files, a statement of the
+additional terms that apply to those files, or a notice indicating
+where to find the applicable terms.
+
+ Additional terms, permissive or non-permissive, may be stated in the
+form of a separately written license, or stated as exceptions;
+the above requirements apply either way.
+
+ 8. Termination.
+
+ You may not propagate or modify a covered work except as expressly
+provided under this License. Any attempt otherwise to propagate or
+modify it is void, and will automatically terminate your rights under
+this License (including any patent licenses granted under the third
+paragraph of section 11).
+
+ However, if you cease all violation of this License, then your
+license from a particular copyright holder is reinstated (a)
+provisionally, unless and until the copyright holder explicitly and
+finally terminates your license, and (b) permanently, if the copyright
+holder fails to notify you of the violation by some reasonable means
+prior to 60 days after the cessation.
+
+ Moreover, your license from a particular copyright holder is
+reinstated permanently if the copyright holder notifies you of the
+violation by some reasonable means, this is the first time you have
+received notice of violation of this License (for any work) from that
+copyright holder, and you cure the violation prior to 30 days after
+your receipt of the notice.
+
+ Termination of your rights under this section does not terminate the
+licenses of parties who have received copies or rights from you under
+this License. If your rights have been terminated and not permanently
+reinstated, you do not qualify to receive new licenses for the same
+material under section 10.
+
+ 9. Acceptance Not Required for Having Copies.
+
+ You are not required to accept this License in order to receive or
+run a copy of the Program. Ancillary propagation of a covered work
+occurring solely as a consequence of using peer-to-peer transmission
+to receive a copy likewise does not require acceptance. However,
+nothing other than this License grants you permission to propagate or
+modify any covered work. These actions infringe copyright if you do
+not accept this License. Therefore, by modifying or propagating a
+covered work, you indicate your acceptance of this License to do so.
+
+ 10. Automatic Licensing of Downstream Recipients.
+
+ Each time you convey a covered work, the recipient automatically
+receives a license from the original licensors, to run, modify and
+propagate that work, subject to this License. You are not responsible
+for enforcing compliance by third parties with this License.
+
+ An "entity transaction" is a transaction transferring control of an
+organization, or substantially all assets of one, or subdividing an
+organization, or merging organizations. If propagation of a covered
+work results from an entity transaction, each party to that
+transaction who receives a copy of the work also receives whatever
+licenses to the work the party's predecessor in interest had or could
+give under the previous paragraph, plus a right to possession of the
+Corresponding Source of the work from the predecessor in interest, if
+the predecessor has it or can get it with reasonable efforts.
+
+ You may not impose any further restrictions on the exercise of the
+rights granted or affirmed under this License. For example, you may
+not impose a license fee, royalty, or other charge for exercise of
+rights granted under this License, and you may not initiate litigation
+(including a cross-claim or counterclaim in a lawsuit) alleging that
+any patent claim is infringed by making, using, selling, offering for
+sale, or importing the Program or any portion of it.
+
+ 11. Patents.
+
+ A "contributor" is a copyright holder who authorizes use under this
+License of the Program or a work on which the Program is based. The
+work thus licensed is called the contributor's "contributor version".
+
+ A contributor's "essential patent claims" are all patent claims
+owned or controlled by the contributor, whether already acquired or
+hereafter acquired, that would be infringed by some manner, permitted
+by this License, of making, using, or selling its contributor version,
+but do not include claims that would be infringed only as a
+consequence of further modification of the contributor version. For
+purposes of this definition, "control" includes the right to grant
+patent sublicenses in a manner consistent with the requirements of
+this License.
+
+ Each contributor grants you a non-exclusive, worldwide, royalty-free
+patent license under the contributor's essential patent claims, to
+make, use, sell, offer for sale, import and otherwise run, modify and
+propagate the contents of its contributor version.
+
+ In the following three paragraphs, a "patent license" is any express
+agreement or commitment, however denominated, not to enforce a patent
+(such as an express permission to practice a patent or covenant not to
+sue for patent infringement). To "grant" such a patent license to a
+party means to make such an agreement or commitment not to enforce a
+patent against the party.
+
+ If you convey a covered work, knowingly relying on a patent license,
+and the Corresponding Source of the work is not available for anyone
+to copy, free of charge and under the terms of this License, through a
+publicly available network server or other readily accessible means,
+then you must either (1) cause the Corresponding Source to be so
+available, or (2) arrange to deprive yourself of the benefit of the
+patent license for this particular work, or (3) arrange, in a manner
+consistent with the requirements of this License, to extend the patent
+license to downstream recipients. "Knowingly relying" means you have
+actual knowledge that, but for the patent license, your conveying the
+covered work in a country, or your recipient's use of the covered work
+in a country, would infringe one or more identifiable patents in that
+country that you have reason to believe are valid.
+
+ If, pursuant to or in connection with a single transaction or
+arrangement, you convey, or propagate by procuring conveyance of, a
+covered work, and grant a patent license to some of the parties
+receiving the covered work authorizing them to use, propagate, modify
+or convey a specific copy of the covered work, then the patent license
+you grant is automatically extended to all recipients of the covered
+work and works based on it.
+
+ A patent license is "discriminatory" if it does not include within
+the scope of its coverage, prohibits the exercise of, or is
+conditioned on the non-exercise of one or more of the rights that are
+specifically granted under this License. You may not convey a covered
+work if you are a party to an arrangement with a third party that is
+in the business of distributing software, under which you make payment
+to the third party based on the extent of your activity of conveying
+the work, and under which the third party grants, to any of the
+parties who would receive the covered work from you, a discriminatory
+patent license (a) in connection with copies of the covered work
+conveyed by you (or copies made from those copies), or (b) primarily
+for and in connection with specific products or compilations that
+contain the covered work, unless you entered into that arrangement,
+or that patent license was granted, prior to 28 March 2007.
+
+ Nothing in this License shall be construed as excluding or limiting
+any implied license or other defenses to infringement that may
+otherwise be available to you under applicable patent law.
+
+ 12. No Surrender of Others' Freedom.
+
+ If conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot convey a
+covered work so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you may
+not convey it at all. For example, if you agree to terms that obligate you
+to collect a royalty for further conveying from those to whom you convey
+the Program, the only way you could satisfy both those terms and this
+License would be to refrain entirely from conveying the Program.
+
+ 13. Use with the GNU Affero General Public License.
+
+ Notwithstanding any other provision of this License, you have
+permission to link or combine any covered work with a work licensed
+under version 3 of the GNU Affero General Public License into a single
+combined work, and to convey the resulting work. The terms of this
+License will continue to apply to the part which is the covered work,
+but the special requirements of the GNU Affero General Public License,
+section 13, concerning interaction through a network will apply to the
+combination as such.
+
+ 14. Revised Versions of this License.
+
+ The Free Software Foundation may publish revised and/or new versions of
+the GNU General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+ Each version is given a distinguishing version number. If the
+Program specifies that a certain numbered version of the GNU General
+Public License "or any later version" applies to it, you have the
+option of following the terms and conditions either of that numbered
+version or of any later version published by the Free Software
+Foundation. If the Program does not specify a version number of the
+GNU General Public License, you may choose any version ever published
+by the Free Software Foundation.
+
+ If the Program specifies that a proxy can decide which future
+versions of the GNU General Public License can be used, that proxy's
+public statement of acceptance of a version permanently authorizes you
+to choose that version for the Program.
+
+ Later license versions may give you additional or different
+permissions. However, no additional obligations are imposed on any
+author or copyright holder as a result of your choosing to follow a
+later version.
+
+ 15. Disclaimer of Warranty.
+
+ THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
+APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
+HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
+OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
+IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
+ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+ 16. Limitation of Liability.
+
+ IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
+THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
+GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
+USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
+DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
+PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
+EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGES.
+
+ 17. Interpretation of Sections 15 and 16.
+
+ If the disclaimer of warranty and limitation of liability provided
+above cannot be given local legal effect according to their terms,
+reviewing courts shall apply local law that most closely approximates
+an absolute waiver of all civil liability in connection with the
+Program, unless a warranty or assumption of liability accompanies a
+copy of the Program in return for a fee.
+
+ END OF TERMS AND CONDITIONS
+
+ How to Apply These Terms to Your New Programs
+
+ If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+ To do so, attach the following notices to the program. It is safest
+to attach them to the start of each source file to most effectively
+state the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+ <one line to give the program's name and a brief idea of what it does.>
+ Copyright (C) <year> <name of author>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+
+Also add information on how to contact you by electronic and paper mail.
+
+ If the program does terminal interaction, make it output a short
+notice like this when it starts in an interactive mode:
+
+ <program> Copyright (C) <year> <name of author>
+ This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+ This is free software, and you are welcome to redistribute it
+ under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License. Of course, your program's commands
+might be different; for a GUI interface, you would use an "about box".
+
+ You should also get your employer (if you work as a programmer) or school,
+if any, to sign a "copyright disclaimer" for the program, if necessary.
+For more information on this, and how to apply and follow the GNU GPL, see
+<https://www.gnu.org/licenses/>.
+
+ The GNU General Public License does not permit incorporating your program
+into proprietary programs. If your program is a subroutine library, you
+may consider it more useful to permit linking proprietary applications with
+the library. If this is what you want to do, use the GNU Lesser General
+Public License instead of this License. But first, please read
+<https://www.gnu.org/licenses/why-not-lgpl.html>.
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..dd33e74
--- /dev/null
+++ b/README.md
@@ -0,0 +1,462 @@
+<div align="center">
+
+ <img src="https://github.com/Gourieff/Assets/raw/main/sd-webui-reactor/ReActor_logo_NEW_EN.png?raw=true" alt="logo" width="180px"/>
+
+ ![Version](https://img.shields.io/badge/node_version-0.5.2_alpha2-lightgreen?style=for-the-badge&labelColor=darkgreen)
+
+ <!--<sup>
+ <font color=brightred>
+
+ ## !!! [Important Update](#latestupdate) !!!<br>Don't forget to add the Node again in existing workflows
+
+ </font>
+ </sup>-->
+
+ <a href="https://boosty.to/artgourieff" target="_blank">
+ <img src="https://lovemet.ru/img/boosty.jpg" width="108" alt="Support Me on Boosty"/>
+ <br>
+ <sup>
+ Support This Project
+ </sup>
+ </a>
+
+ <hr>
+
+ [![Commit activity](https://img.shields.io/github/commit-activity/t/Gourieff/ComfyUI-ReActor/main?cacheSeconds=0)](https://github.com/Gourieff/ComfyUI-ReActor/commits/main)
+ ![Last commit](https://img.shields.io/github/last-commit/Gourieff/ComfyUI-ReActor/main?cacheSeconds=0)
+ [![Opened issues](https://img.shields.io/github/issues/Gourieff/ComfyUI-ReActor?color=red)](https://github.com/Gourieff/ComfyUI-ReActor/issues?cacheSeconds=0)
+ [![Closed issues](https://img.shields.io/github/issues-closed/Gourieff/ComfyUI-ReActor?color=green&cacheSeconds=0)](https://github.com/Gourieff/ComfyUI-ReActor/issues?q=is%3Aissue+is%3Aclosed)
+ ![License](https://img.shields.io/github/license/Gourieff/ComfyUI-ReActor)
+
+ English | [Русский](/README_RU.md)
+
+# ReActor Node for ComfyUI
+
+</div>
+
+### The Fast and Simple Face Swap Extension Node for ComfyUI, based on [ReActor](https://github.com/Gourieff/sd-webui-reactor) SD-WebUI Face Swap Extension
+
+> By using this Node you understand the [responsibility](#disclaimer))
+
+<div align="center">
+
+---
+[**What's new**](#latestupdate) | [**Installation**](#installation) | [**Usage**](#usage) | [**Troubleshooting**](#troubleshooting) | [**Updating**](#updating) | [**Disclaimer**](#disclaimer) | [**Credits**](#credits) | [**Note!**](#note)
+
+---
+
+</div>
+
+<div align="center">
+ <img src="https://github.com/Gourieff/Assets/blob/main/comfyui-reactor-node/uploads/demo.gif?raw=true" alt="demo" width="100%"/>
+</div>
+
+<a name="latestupdate">
+
+## What's new in the latest update
+
+### 0.5.2 <sub><sup>ALPHA1</sup></sub>
+
+- New node "Unload ReActor Models" - is useful for complex WFs when you need to free some VRAM utilized by ReActor
+- Support of ORT CoreML and ROCM EPs, just install onnxruntime version you need
+- Install script improvements to install latest versions of ORT-GPU
+
+<details>
+ <summary><a>Previous versions</a></summary>
+
+### 0.5.1
+
+- Support of GPEN 1024/2048 restoration models (available in the HF dataset https://huggingface.co/datasets/Gourieff/ReActor/tree/main/models/facerestore_models)
+- ReActorFaceBoost Node - an attempt to improve the quality of swapped faces. The idea is to restore and scale the swapped face (according to the `face_size` parameter of the restoration model) BEFORE pasting it to the target image (via inswapper algorithms), more information is [here (PR#321)](https://github.com/Gourieff/comfyui-reactor-node/pull/321)
+
+<img src="https://github.com/Gourieff/Assets/blob/main/comfyui-reactor-node/0.5.1-whatsnew-01.jpg?raw=true" alt="0.5.1-whatsnew-01" width="100%"/>
+
+[Full size demo preview](https://github.com/Gourieff/Assets/blob/main/comfyui-reactor-node/0.5.1-whatsnew-02.png)
+
+- Sorting facemodels alphabetically
+- A lot of fixes and improvements
+
+### [0.5.0 <sub><sup>BETA4</sup></sub>](https://github.com/Gourieff/comfyui-reactor-node/releases/tag/v0.5.0)
+
+- Spandrel lib support for GFPGAN
+
+### 0.5.0 <sub><sup>BETA3</sup></sub>
+
+- Fixes: "RAM issue", "No detection" for MaskingHelper
+
+### 0.5.0 <sub><sup>BETA2</sup></sub>
+
+- You can now build a blended face model from a batch of face models you already have, just add the "Make Face Model Batch" node to your workflow and connect several models via "Load Face Model"
+- Huge performance boost of the image analyzer's module! 10x speed up! Working with videos is now a pleasure!
+
+<img src="https://github.com/Gourieff/Assets/blob/main/comfyui-reactor-node/0.5.0-whatsnew-05.png?raw=true" alt="0.5.0-whatsnew-05" width="100%"/>
+
+### 0.5.0 <sub><sup>BETA1</sup></sub>
+
+- SWAPPED_FACE output for the Masking Helper Node
+- FIX: Empty A-channel for Masking Helper IMAGE output (causing errors with some nodes) was removed
+
+### 0.5.0 <sub><sup>ALPHA1</sup></sub>
+
+- ReActorBuildFaceModel Node got "face_model" output to provide a blended face model directly to the main Node:
+
+Basic workflow [💾](https://github.com/Gourieff/Assets/blob/main/comfyui-reactor-node/workflows/ReActor--Build-Blended-Face-Model--v2.json)
+
+- Face Masking feature is available now, just add the "ReActorMaskHelper" Node to the workflow and connect it as shown below:
+
+<img src="https://github.com/Gourieff/Assets/blob/main/comfyui-reactor-node/0.5.0-whatsnew-01.jpg?raw=true" alt="0.5.0-whatsnew-01" width="100%"/>
+
+If you don't have the "face_yolov8m.pt" Ultralytics model - you can download it from the [Assets](https://huggingface.co/datasets/Gourieff/ReActor/blob/main/models/detection/bbox/face_yolov8m.pt) and put it into the "ComfyUI\models\ultralytics\bbox" directory
+<br>
+As well as ["sam_vit_b_01ec64.pth"](https://huggingface.co/datasets/Gourieff/ReActor/blob/main/models/sams/sam_vit_b_01ec64.pth) model - download (if you don't have it) and put it into the "ComfyUI\models\sams" directory;
+
+Use this Node to gain the best results of the face swapping process:
+
+<img src="https://github.com/Gourieff/Assets/blob/main/comfyui-reactor-node/0.5.0-whatsnew-02.jpg?raw=true" alt="0.5.0-whatsnew-02" width="100%"/>
+
+- ReActorImageDublicator Node - rather useful for those who create videos, it helps to duplicate one image to several frames to use them with VAE Encoder (e.g. live avatars):
+
+<img src="https://github.com/Gourieff/Assets/blob/main/comfyui-reactor-node/0.5.0-whatsnew-03.jpg?raw=true" alt="0.5.0-whatsnew-03" width="100%"/>
+
+- ReActorFaceSwapOpt (a simplified version of the Main Node) + ReActorOptions Nodes to set some additional options such as (new) "input/source faces separate order". Yes! You can now set the order of faces in the index in the way you want ("large to small" goes by default)!
+
+<img src="https://github.com/Gourieff/Assets/blob/main/comfyui-reactor-node/0.5.0-whatsnew-04.jpg?raw=true" alt="0.5.0-whatsnew-04" width="100%"/>
+
+- Little speed boost when analyzing target images (unfortunately it is still quite slow in compare to swapping and restoring...)
+
+### [0.4.2](https://github.com/Gourieff/comfyui-reactor-node/releases/tag/v0.4.2)
+
+- GPEN-BFR-512 and RestoreFormer_Plus_Plus face restoration models support
+
+You can download models here: https://huggingface.co/datasets/Gourieff/ReActor/tree/main/models/facerestore_models
+<br>Put them into the `ComfyUI\models\facerestore_models` folder
+
+<img src="https://github.com/Gourieff/Assets/blob/main/comfyui-reactor-node/0.4.2-whatsnew-04.jpg?raw=true" alt="0.4.2-whatsnew-04" width="100%"/>
+
+- Due to popular demand - you can now blend several images with persons into one face model file and use it with "Load Face Model" Node or in SD WebUI as well;
+
+Experiment and create new faces or blend faces of one person to gain better accuracy and likeness!
+
+Just add the ImpactPack's "Make Image Batch" Node as the input to the ReActor's one and load images you want to blend into one model:
+
+<img src="https://github.com/Gourieff/Assets/blob/main/comfyui-reactor-node/0.4.2-whatsnew-01.jpg?raw=true" alt="0.4.2-whatsnew-01" width="100%"/>
+
+Result example (the new face was created from 4 faces of different actresses):
+
+<img src="https://github.com/Gourieff/Assets/blob/main/comfyui-reactor-node/0.4.2-whatsnew-02.jpg?raw=true" alt="0.4.2-whatsnew-02" width="75%"/>
+
+Basic workflow [💾](https://github.com/Gourieff/Assets/blob/main/comfyui-reactor-node/workflows/ReActor--Build-Blended-Face-Model--v1.json)
+
+### [0.4.1](https://github.com/Gourieff/comfyui-reactor-node/releases/tag/v0.4.1)
+
+- CUDA 12 Support - don't forget to run (Windows) `install.bat` or (Linux/MacOS) `install.py` for ComfyUI's Python enclosure or try to install ORT-GPU for CU12 manually (https://onnxruntime.ai/docs/install/#install-onnx-runtime-gpu-cuda-12x)
+- Issue https://github.com/Gourieff/comfyui-reactor-node/issues/173 fix
+
+- Separate Node for the Face Restoration postprocessing (FR https://github.com/Gourieff/comfyui-reactor-node/issues/191), can be found inside ReActor's menu (RestoreFace Node)
+- (Windows) Installation can be done for Python from the System's PATH
+- Different fixes and improvements
+
+- Face Restore Visibility and CodeFormer Weight (Fidelity) options are now available! Don't forget to reload the Node in your existing workflow
+
+<img src="https://github.com/Gourieff/Assets/blob/main/comfyui-reactor-node/0.4.1-whatsnew-01.jpg?raw=true" alt="0.4.1-whatsnew-01" width="100%"/>
+
+### [0.4.0](https://github.com/Gourieff/comfyui-reactor-node/releases/tag/v0.4.0)
+
+- Input "input_image" goes first now, it gives a correct bypass and also it is right to have the main input first;
+- You can now save face models as "safetensors" files (`ComfyUI\models\reactor\faces`) and load them into ReActor implementing different scenarios and keeping super lightweight face models of the faces you use:
+
+<img src="https://github.com/Gourieff/Assets/blob/main/comfyui-reactor-node/0.4.0-whatsnew-01.jpg?raw=true" alt="0.4.0-whatsnew-01" width="100%"/>
+<img src="https://github.com/Gourieff/Assets/blob/main/comfyui-reactor-node/0.4.0-whatsnew-02.jpg?raw=true" alt="0.4.0-whatsnew-02" width="100%"/>
+
+- Ability to build and save face models directly from an image:
+
+<img src="https://github.com/Gourieff/Assets/blob/main/comfyui-reactor-node/0.4.0-whatsnew-03.jpg?raw=true" alt="0.4.0-whatsnew-03" width="50%"/>
+
+- Both the inputs are optional, just connect one of them according to your workflow; if both is connected - `image` has a priority.
+- Different fixes making this extension better.
+
+Thanks to everyone who finds bugs, suggests new features and supports this project!
+
+</details>
+
+## Installation
+
+<details>
+ <summary>SD WebUI: <a href="https://github.com/AUTOMATIC1111/stable-diffusion-webui/">AUTOMATIC1111</a> or <a href="https://github.com/vladmandic/automatic">SD.Next</a></summary>
+
+1. Close (stop) your SD-WebUI/Comfy Server if it's running
+2. (For Windows Users):
+ - Install [Visual Studio 2022](https://visualstudio.microsoft.com/downloads/) (Community version - you need this step to build Insightface)
+ - OR only [VS C++ Build Tools](https://visualstudio.microsoft.com/visual-cpp-build-tools/) and select "Desktop Development with C++" under "Workloads -> Desktop & Mobile"
+ - OR if you don't want to install VS or VS C++ BT - follow [this steps (sec. I)](#insightfacebuild)
+3. Go to the `extensions\sd-webui-comfyui\ComfyUI\custom_nodes`
+4. Open Console or Terminal and run `git clone https://github.com/Gourieff/ComfyUI-ReActor`
+5. Go to the SD WebUI root folder, open Console or Terminal and run (Windows users)`.\venv\Scripts\activate` or (Linux/MacOS)`venv/bin/activate`
+6. `python -m pip install -U pip`
+7. `cd extensions\sd-webui-comfyui\ComfyUI\custom_nodes\comfyui-reactor-node`
+8. `python install.py`
+9. Please, wait until the installation process will be finished
+10. (From the version 0.3.0) Download additional facerestorers models from the link below and put them into the `extensions\sd-webui-comfyui\ComfyUI\models\facerestore_models` directory:<br>
+https://huggingface.co/datasets/Gourieff/ReActor/tree/main/models/facerestore_models
+11. Run SD WebUI and check console for the message that ReActor Node is running:
+<img src="https://github.com/Gourieff/Assets/blob/main/comfyui-reactor-node/uploads/console_status_running.jpg?raw=true" alt="console_status_running" width="759"/>
+
+12. Go to the ComfyUI tab and find there ReActor Node inside the menu `ReActor` or by using a search:
+<img src="https://github.com/Gourieff/Assets/blob/main/comfyui-reactor-node/uploads/webui-demo.png?raw=true" alt="webui-demo" width="100%"/>
+<img src="https://github.com/Gourieff/Assets/blob/main/comfyui-reactor-node/uploads/search-demo.png?raw=true" alt="webui-demo" width="1043"/>
+
+</details>
+
+<details>
+ <summary>Standalone (Portable) <a href="https://github.com/comfyanonymous/ComfyUI">ComfyUI</a> for Windows</summary>
+
+1. Do the following:
+ - Install [Visual Studio 2022](https://visualstudio.microsoft.com/downloads/) (Community version - you need this step to build Insightface)
+ - OR only [VS C++ Build Tools](https://visualstudio.microsoft.com/visual-cpp-build-tools/) and select "Desktop Development with C++" under "Workloads -> Desktop & Mobile"
+ - OR if you don't want to install VS or VS C++ BT - follow [this steps (sec. I)](#insightfacebuild)
+2. Choose between two options:
+ - (ComfyUI Manager) Open ComfyUI Manager, click "Install Custom Nodes", type "ReActor" in the "Search" field and then click "Install". After ComfyUI will complete the process - please restart the Server.
+ - (Manually) Go to `ComfyUI\custom_nodes`, open Console and run `git clone https://github.com/Gourieff/ComfyUI-ReActor`
+3. Go to `ComfyUI\custom_nodes\comfyui-reactor-node` and run `install.bat`
+4. If you don't have the "face_yolov8m.pt" Ultralytics model - you can download it from the [Assets](https://huggingface.co/datasets/Gourieff/ReActor/blob/main/models/detection/bbox/face_yolov8m.pt) and put it into the "ComfyUI\models\ultralytics\bbox" directory
+<br>
+As well as one or both of "Sams" models from [here](https://huggingface.co/datasets/Gourieff/ReActor/tree/main/models/sams) - download (if you don't have them) and put into the "ComfyUI\models\sams" directory
+5. Run ComfyUI and find there ReActor Nodes inside the menu `ReActor` or by using a search
+
+</details>
+
+## Usage
+
+You can find ReActor Nodes inside the menu `ReActor` or by using a search (just type "ReActor" in the search field)
+
+List of Nodes:
+- ••• Main Nodes •••
+ - ReActorFaceSwap (Main Node)
+ - ReActorFaceSwapOpt (Main Node with the additional Options input)
+ - ReActorOptions (Options for ReActorFaceSwapOpt)
+ - ReActorFaceBoost (Face Booster Node)
+ - ReActorMaskHelper (Masking Helper)
+- ••• Operations with Face Models •••
+ - ReActorSaveFaceModel (Save Face Model)
+ - ReActorLoadFaceModel (Load Face Model)
+ - ReActorBuildFaceModel (Build Blended Face Model)
+ - ReActorMakeFaceModelBatch (Make Face Model Batch)
+- ••• Additional Nodes •••
+ - ReActorRestoreFace (Face Restoration)
+ - ReActorImageDublicator (Dublicate one Image to Images List)
+ - ImageRGBA2RGB (Convert RGBA to RGB)
+
+Connect all required slots and run the query.
+
+### Main Node Inputs
+
+- `input_image` - is an image to be processed (target image, analog of "target image" in the SD WebUI extension);
+ - Supported Nodes: "Load Image", "Load Video" or any other nodes providing images as an output;
+- `source_image` - is an image with a face or faces to swap in the `input_image` (source image, analog of "source image" in the SD WebUI extension);
+ - Supported Nodes: "Load Image" or any other nodes providing images as an output;
+- `face_model` - is the input for the "Load Face Model" Node or another ReActor node to provide a face model file (face embedding) you created earlier via the "Save Face Model" Node;
+ - Supported Nodes: "Load Face Model", "Build Blended Face Model";
+
+### Main Node Outputs
+
+- `IMAGE` - is an output with the resulted image;
+ - Supported Nodes: any nodes which have images as an input;
+- `FACE_MODEL` - is an output providing a source face's model being built during the swapping process;
+ - Supported Nodes: "Save Face Model", "ReActor", "Make Face Model Batch";
+
+### Face Restoration
+
+Since version 0.3.0 ReActor Node has a buil-in face restoration.<br>Just download the models you want (see [Installation](#installation) instruction) and select one of them to restore the resulting face(s) during the faceswap. It will enhance face details and make your result more accurate.
+
+### Face Indexes
+
+By default ReActor detects faces in images from "large" to "small".<br>You can change this option by adding ReActorFaceSwapOpt node with ReActorOptions.
+
+And if you need to specify faces, you can set indexes for source and input images.
+
+Index of the first detected face is 0.
+
+You can set indexes in the order you need.<br>
+E.g.: 0,1,2 (for Source); 1,0,2 (for Input).<br>This means: the second Input face (index = 1) will be swapped by the first Source face (index = 0) and so on.
+
+### Genders
+
+You can specify the gender to detect in images.<br>
+ReActor will swap a face only if it meets the given condition.
+
+### Face Models
+
+Since version 0.4.0 you can save face models as "safetensors" files (stored in `ComfyUI\models\reactor\faces`) and load them into ReActor implementing different scenarios and keeping super lightweight face models of the faces you use.
+
+To make new models appear in the list of the "Load Face Model" Node - just refresh the page of your ComfyUI web application.<br>
+(I recommend you to use ComfyUI Manager - otherwise you workflow can be lost after you refresh the page if you didn't save it before that).
+
+## Troubleshooting
+
+<a name="insightfacebuild">
+
+### **I. (For Windows users) If you still cannot build Insightface for some reasons or just don't want to install Visual Studio or VS C++ Build Tools - do the following:**
+
+1. (ComfyUI Portable) From the root folder check the version of Python:<br>run CMD and type `python_embeded\python.exe -V`
+2. Download prebuilt Insightface package [for Python 3.10](https://github.com/Gourieff/Assets/raw/main/Insightface/insightface-0.7.3-cp310-cp310-win_amd64.whl) or [for Python 3.11](https://github.com/Gourieff/Assets/raw/main/Insightface/insightface-0.7.3-cp311-cp311-win_amd64.whl) (if in the previous step you see 3.11) or [for Python 3.12](https://github.com/Gourieff/Assets/raw/main/Insightface/insightface-0.7.3-cp312-cp312-win_amd64.whl) (if in the previous step you see 3.12) and put into the stable-diffusion-webui (A1111 or SD.Next) root folder (where you have "webui-user.bat" file) or into ComfyUI root folder if you use ComfyUI Portable
+3. From the root folder run:
+ - (SD WebUI) CMD and `.\venv\Scripts\activate`
+ - (ComfyUI Portable) run CMD
+4. Then update your PIP:
+ - (SD WebUI) `python -m pip install -U pip`
+ - (ComfyUI Portable) `python_embeded\python.exe -m pip install -U pip`
+5. Then install Insightface:
+ - (SD WebUI) `pip install insightface-0.7.3-cp310-cp310-win_amd64.whl` (for 3.10) or `pip install insightface-0.7.3-cp311-cp311-win_amd64.whl` (for 3.11) or `pip install insightface-0.7.3-cp312-cp312-win_amd64.whl` (for 3.12)
+ - (ComfyUI Portable) `python_embeded\python.exe -m pip install insightface-0.7.3-cp310-cp310-win_amd64.whl` (for 3.10) or `python_embeded\python.exe -m pip install insightface-0.7.3-cp311-cp311-win_amd64.whl` (for 3.11) or `python_embeded\python.exe -m pip install insightface-0.7.3-cp312-cp312-win_amd64.whl` (for 3.12)
+6. Enjoy!
+
+### **II. "AttributeError: 'NoneType' object has no attribute 'get'"**
+
+This error may occur if there's smth wrong with the model file `inswapper_128.onnx`
+
+Try to download it manually from [here](https://github.com/facefusion/facefusion-assets/releases/download/models/inswapper_128.onnx)
+and put it to the `ComfyUI\models\insightface` replacing existing one
+
+### **III. "reactor.execute() got an unexpected keyword argument 'reference_image'"**
+
+This means that input points have been changed with the latest update<br>
+Remove the current ReActor Node from your workflow and add it again
+
+### **IV. ControlNet Aux Node IMPORT failed error when using with ReActor Node**
+
+1. Close ComfyUI if it runs
+2. Go to the ComfyUI root folder, open CMD there and run:
+ - `python_embeded\python.exe -m pip uninstall -y opencv-python opencv-contrib-python opencv-python-headless`
+ - `python_embeded\python.exe -m pip install opencv-python==4.7.0.72`
+3. That's it!
+
+<img src="https://github.com/Gourieff/Assets/blob/main/comfyui-reactor-node/uploads/reactor-w-controlnet.png?raw=true" alt="reactor+controlnet" />
+
+### **V. "ModuleNotFoundError: No module named 'basicsr'" or "subprocess-exited-with-error" during future-0.18.3 installation**
+
+- Download https://github.com/Gourieff/Assets/raw/main/comfyui-reactor-node/future-0.18.3-py3-none-any.whl<br>
+- Put it to ComfyUI root And run:
+
+ python_embeded\python.exe -m pip install future-0.18.3-py3-none-any.whl
+
+- Then:
+
+ python_embeded\python.exe -m pip install basicsr
+
+### **VI. "fatal: fetch-pack: invalid index-pack output" when you try to `git clone` the repository"**
+
+Try to clone with `--depth=1` (last commit only):
+
+ git clone --depth=1 https://github.com/Gourieff/ComfyUI-ReActor
+
+Then retrieve the rest (if you need):
+
+ git fetch --unshallow
+
+## Updating
+
+Just put .bat or .sh script from this [Repo](https://github.com/Gourieff/sd-webui-extensions-updater) to the `ComfyUI\custom_nodes` directory and run it when you need to check for updates
+
+### Disclaimer
+
+This software is meant to be a productive contribution to the rapidly growing AI-generated media industry. It will help artists with tasks such as animating a custom character or using the character as a model for clothing etc.
+
+The developers of this software are aware of its possible unethical applications and are committed to take preventative measures against them. We will continue to develop this project in the positive direction while adhering to law and ethics.
+
+Users of this software are expected to use this software responsibly while abiding the local law. If face of a real person is being used, users are suggested to get consent from the concerned person and clearly mention that it is a deepfake when posting content online. **Developers and Contributors of this software are not responsible for actions of end-users.**
+
+By using this extension you are agree not to create any content that:
+- violates any laws;
+- causes any harm to a person or persons;
+- propagates (spreads) any information (both public or personal) or images (both public or personal) which could be meant for harm;
+- spreads misinformation;
+- targets vulnerable groups of people.
+
+This software utilizes the pre-trained models `buffalo_l` and `inswapper_128.onnx`, which are provided by [InsightFace](https://github.com/deepinsight/insightface/). These models are included under the following conditions:
+
+[From insighface license](https://github.com/deepinsight/insightface/tree/master/python-package): The InsightFace’s pre-trained models are available for non-commercial research purposes only. This includes both auto-downloading models and manually downloaded models.
+
+Users of this software must strictly adhere to these conditions of use. The developers and maintainers of this software are not responsible for any misuse of InsightFace’s pre-trained models.
+
+Please note that if you intend to use this software for any commercial purposes, you will need to train your own models or find models that can be used commercially.
+
+### Models Hashsum
+
+#### Safe-to-use models have the following hash:
+
+inswapper_128.onnx
+```
+MD5:a3a155b90354160350efd66fed6b3d80
+SHA256:e4a3f08c753cb72d04e10aa0f7dbe3deebbf39567d4ead6dce08e98aa49e16af
+```
+
+1k3d68.onnx
+
+```
+MD5:6fb94fcdb0055e3638bf9158e6a108f4
+SHA256:df5c06b8a0c12e422b2ed8947b8869faa4105387f199c477af038aa01f9a45cc
+```
+
+2d106det.onnx
+
+```
+MD5:a3613ef9eb3662b4ef88eb90db1fcf26
+SHA256:f001b856447c413801ef5c42091ed0cd516fcd21f2d6b79635b1e733a7109dbf
+```
+
+det_10g.onnx
+
+```
+MD5:4c10eef5c9e168357a16fdd580fa8371
+SHA256:5838f7fe053675b1c7a08b633df49e7af5495cee0493c7dcf6697200b85b5b91
+```
+
+genderage.onnx
+
+```
+MD5:81c77ba87ab38163b0dec6b26f8e2af2
+SHA256:4fde69b1c810857b88c64a335084f1c3fe8f01246c9a191b48c7bb756d6652fb
+```
+
+w600k_r50.onnx
+
+```
+MD5:80248d427976241cbd1343889ed132b3
+SHA256:4c06341c33c2ca1f86781dab0e829f88ad5b64be9fba56e56bc9ebdefc619e43
+```
+
+**Please check hashsums if you download these models from unverified (or untrusted) sources**
+
+<a name="credits">
+
+## Thanks and Credits
+
+<details>
+ <summary><a>Click to expand</a></summary>
+
+<br>
+
+|file|source|license|
+|----|------|-------|
+|[buffalo_l.zip](https://huggingface.co/datasets/Gourieff/ReActor/blob/main/models/buffalo_l.zip) | [DeepInsight](https://github.com/deepinsight/insightface) | ![license](https://img.shields.io/badge/license-non_commercial-red) |
+| [codeformer-v0.1.0.pth](https://huggingface.co/datasets/Gourieff/ReActor/blob/main/models/facerestore_models/codeformer-v0.1.0.pth) | [sczhou](https://github.com/sczhou/CodeFormer) | ![license](https://img.shields.io/badge/license-non_commercial-red) |
+| [GFPGANv1.3.pth](https://huggingface.co/datasets/Gourieff/ReActor/blob/main/models/facerestore_models/GFPGANv1.3.pth) | [TencentARC](https://github.com/TencentARC/GFPGAN) | ![license](https://img.shields.io/badge/license-Apache_2.0-green.svg) |
+| [GFPGANv1.4.pth](https://huggingface.co/datasets/Gourieff/ReActor/blob/main/models/facerestore_models/GFPGANv1.4.pth) | [TencentARC](https://github.com/TencentARC/GFPGAN) | ![license](https://img.shields.io/badge/license-Apache_2.0-green.svg) |
+| [inswapper_128.onnx](https://github.com/facefusion/facefusion-assets/releases/download/models/inswapper_128.onnx) | [DeepInsight](https://github.com/deepinsight/insightface) | ![license](https://img.shields.io/badge/license-non_commercial-red) |
+| [inswapper_128_fp16.onnx](https://github.com/facefusion/facefusion-assets/releases/download/models/inswapper_128_fp16.onnx) | [Hillobar](https://github.com/Hillobar/Rope) | ![license](https://img.shields.io/badge/license-non_commercial-red) |
+
+[BasicSR](https://github.com/XPixelGroup/BasicSR) - [@XPixelGroup](https://github.com/XPixelGroup) <br>
+[facexlib](https://github.com/xinntao/facexlib) - [@xinntao](https://github.com/xinntao) <br>
+
+[@s0md3v](https://github.com/s0md3v), [@henryruhs](https://github.com/henryruhs) - the original Roop App <br>
+[@ssitu](https://github.com/ssitu) - the first version of [ComfyUI_roop](https://github.com/ssitu/ComfyUI_roop) extension
+
+</details>
+
+<a name="note">
+
+### Note!
+
+**If you encounter any errors when you use ReActor Node - don't rush to open an issue, first try to remove current ReActor node in your workflow and add it again**
+
+**ReActor Node gets updates from time to time, new functions appear and old node can work with errors or not work at all**
diff --git a/README_RU.md b/README_RU.md
new file mode 100644
index 0000000..2100e63
--- /dev/null
+++ b/README_RU.md
@@ -0,0 +1,471 @@
+<div align="center">
+
+ <img src="https://github.com/Gourieff/Assets/raw/main/sd-webui-reactor/ReActor_logo_NEW_RU.png?raw=true" alt="logo" width="180px"/>
+
+ ![Version](https://img.shields.io/badge/версия_нода-0.5.2_alpha2-lightgreen?style=for-the-badge&labelColor=darkgreen)
+
+ <!--<sup>
+ <font color=brightred>
+
+ ## !!! [Важные изменения](#latestupdate) !!!<br>Не забудьте добавить Нод заново в существующие воркфлоу
+
+ </font>
+ </sup>-->
+
+ <a href="https://boosty.to/artgourieff" target="_blank">
+ <img src="https://lovemet.ru/img/boosty.jpg" width="108" alt="Поддержать проект на Boosty"/>
+ <br>
+ <sup>
+ Поддержать проект
+ </sup>
+ </a>
+
+ <hr>
+
+ [![Commit activity](https://img.shields.io/github/commit-activity/t/Gourieff/ComfyUI-ReActor/main?cacheSeconds=0)](https://github.com/Gourieff/ComfyUI-ReActor/commits/main)
+ ![Last commit](https://img.shields.io/github/last-commit/Gourieff/ComfyUI-ReActor/main?cacheSeconds=0)
+ [![Opened issues](https://img.shields.io/github/issues/Gourieff/ComfyUI-ReActor?color=red)](https://github.com/Gourieff/ComfyUI-ReActor/issues?cacheSeconds=0)
+ [![Closed issues](https://img.shields.io/github/issues-closed/Gourieff/ComfyUI-ReActor?color=green&cacheSeconds=0)](https://github.com/Gourieff/ComfyUI-ReActor/issues?q=is%3Aissue+is%3Aclosed)
+ ![License](https://img.shields.io/github/license/Gourieff/ComfyUI-ReActor)
+
+ [English](/README.md) | Русский
+
+# ReActor Node для ComfyUI
+
+</div>
+
+### Нод (node) для быстрой и простой замены лиц на любых изображениях для работы с ComfyUI, основан на [ReActor](https://github.com/Gourieff/sd-webui-reactor) SD-WebUI Face Swap Extension
+
+> Используя данный Нод, вы принимаете [ответственность](#disclaimer)
+
+<div align="center">
+
+---
+[**Что нового**](#latestupdate) | [**Установка**](#installation) | [**Использование**](#usage) | [**Устранение проблем**](#troubleshooting) | [**Обновление**](#updating) | [**Ответственность**](#disclaimer) | [**Благодарности**](#credits) | [**Заметка**](#note)
+
+---
+
+</div>
+
+<div align="center">
+ <img src="https://github.com/Gourieff/Assets/blob/main/comfyui-reactor-node/uploads/demo.gif?raw=true" alt="logo" width="100%"/>
+</div>
+
+<a name="latestupdate">
+
+## Что нового в последнем обновлении
+
+### 0.5.2 <sub><sup>ALPHA1</sup></sub>
+
+- Новый нод "Unload ReActor Models" - полезен для сложных воркфлоу, когда вам нужно освободить ОЗУ, занятую РеАктором
+- Поддержка ORT CoreML and ROCM EPs, достаточно установить ту версию onnxruntime, которая соответствует вашему GPU
+- Некоторые улучшения скрипта установки для поддержки последней версии ORT-GPU
+
+<details>
+ <summary><a>Предыдущие версии</a></summary>
+
+### 0.5.1
+
+- Поддержка моделей восстановления лиц GPEN 1024/2048 (доступны в датасете на HF https://huggingface.co/datasets/Gourieff/ReActor/tree/main/models/facerestore_models)
+- Нод ReActorFaceBoost - попытка улучшить качество заменённых лиц. Идея состоит в том, чтобы восстановить и увеличить заменённое лицо (в соответствии с параметром `face_size` модели реставрации) ДО того, как лицо будет вставлено в целевое изображения (через алгоритмы инсваппера), больше информации [здесь (PR#321)](https://github.com/Gourieff/comfyui-reactor-node/pull/321)
+
+<img src="https://github.com/Gourieff/Assets/blob/main/comfyui-reactor-node/0.5.1-whatsnew-01.jpg?raw=true" alt="0.5.1-whatsnew-01" width="100%"/>
+
+[Полноразмерное демо-превью](https://github.com/Gourieff/Assets/blob/main/comfyui-reactor-node/0.5.1-whatsnew-02.png)
+
+- Сортировка моделей лиц по алфавиту
+- Множество исправлений и улучшений
+
+### [0.5.0 <sub><sup>BETA4</sup></sub>](https://github.com/Gourieff/comfyui-reactor-node/releases/tag/v0.5.0)
+
+- Поддержка библиотеки Spandrel при работе с GFPGAN
+
+### 0.5.0 <sub><sup>BETA3</sup></sub>
+
+- Исправления: "RAM issue", "No detection" для MaskingHelper
+
+### 0.5.0 <sub><sup>BETA2</sup></sub>
+
+- Появилась возможность строить смешанные модели лиц из пачки уже имеющихся моделей - добавьте для этого нод "Make Face Model Batch" в свой воркфлоу и загрузите несколько моделей через ноды "Load Face Model"
+- Огромный буст производительности модуля анализа изображений! 10-кратный прирост скорости! Работа с видео теперь в удовольствие!
+
+<img src="https://github.com/Gourieff/Assets/blob/main/comfyui-reactor-node/0.5.0-whatsnew-05.png?raw=true" alt="0.5.0-whatsnew-05" width="100%"/>
+
+### 0.5.0 <sub><sup>BETA1</sup></sub>
+
+- Добавлен выход SWAPPED_FACE для нода Masking Helper
+- FIX: Удалён пустой A-канал на выходе IMAGE нода Masking Helper (вызывавший ошибки с некоторым нодами)
+
+### 0.5.0 <sub><sup>ALPHA1</sup></sub>
+
+- Нод ReActorBuildFaceModel получил выход "face_model" для отправки совмещенной модели лиц непосредственно в основной Нод:
+
+Basic workflow [💾](https://github.com/Gourieff/Assets/blob/main/comfyui-reactor-node/workflows/ReActor--Build-Blended-Face-Model--v2.json)
+
+- Функции маски лица теперь доступна и в версии для Комфи, просто добавьте нод "ReActorMaskHelper" в воркфлоу и соедините узлы, как показано ниже:
+
+<img src="https://github.com/Gourieff/Assets/blob/main/comfyui-reactor-node/0.5.0-whatsnew-01.jpg?raw=true" alt="0.5.0-whatsnew-01" width="100%"/>
+
+Если модель "face_yolov8m.pt" у вас отсутствует - можете скачать её [отсюда](https://huggingface.co/datasets/Gourieff/ReActor/blob/main/models/detection/bbox/face_yolov8m.pt) и положить в папку "ComfyUI\models\ultralytics\bbox"
+<br>
+То же самое и с ["sam_vit_b_01ec64.pth"](https://huggingface.co/datasets/Gourieff/ReActor/blob/main/models/sams/sam_vit_b_01ec64.pth) - скачайте (если отсутствует) и положите в папку "ComfyUI\models\sams";
+
+Данный нод поможет вам получить куда более аккуратный результат при замене лиц:
+
+<img src="https://github.com/Gourieff/Assets/blob/main/comfyui-reactor-node/0.5.0-whatsnew-02.jpg?raw=true" alt="0.5.0-whatsnew-02" width="100%"/>
+
+- Нод ReActorImageDublicator - полезен тем, кто создает видео, помогает продублировать одиночное изображение в несколько копий, чтобы использовать их, к примеру, с VAE энкодером:
+
+<img src="https://github.com/Gourieff/Assets/blob/main/comfyui-reactor-node/0.5.0-whatsnew-03.jpg?raw=true" alt="0.5.0-whatsnew-03" width="100%"/>
+
+- ReActorFaceSwapOpt (упрощенная версия основного нода) + нод ReActorOptions для установки дополнительных опций, как (новые) "отдельный порядок лиц для input/source". Да! Теперь можно установить любой порядок "чтения" индекса лиц на изображении, в т.ч. от большего к меньшему (по умолчанию)!
+
+<img src="https://github.com/Gourieff/Assets/blob/main/comfyui-reactor-node/0.5.0-whatsnew-04.jpg?raw=true" alt="0.5.0-whatsnew-04" width="100%"/>
+
+- Небольшое улучшение скорости анализа целевых изображений (input)
+
+### [0.4.2](https://github.com/Gourieff/comfyui-reactor-node/releases/tag/v0.4.2)
+
+- Добавлена поддержка GPEN-BFR-512 и RestoreFormer_Plus_Plus моделей восстановления лиц
+
+Скачать можно здесь: https://huggingface.co/datasets/Gourieff/ReActor/tree/main/models/facerestore_models
+<br>Добавьте модели в папку `ComfyUI\models\facerestore_models`
+
+<img src="https://github.com/Gourieff/Assets/blob/main/comfyui-reactor-node/0.4.2-whatsnew-04.jpg?raw=true" alt="0.4.2-whatsnew-04" width="100%"/>
+
+- По многочисленным просьбам появилась возможность строить смешанные модели лиц и в ComfyUI тоже и использовать их с нодом "Load Face Model" Node или в SD WebUI;
+
+Экспериментируйте и создавайте новые лица или совмещайте разные лица нужного вам персонажа, чтобы добиться лучшей точности и схожести с оригиналом!
+
+Достаточно добавить нод "Make Image Batch" (ImpactPack) на вход нового нода РеАктора и загрузить в пачку необходимые вам изображения для построения смешанной модели:
+
+<img src="https://github.com/Gourieff/Assets/blob/main/comfyui-reactor-node/0.4.2-whatsnew-01.jpg?raw=true" alt="0.4.2-whatsnew-01" width="100%"/>
+
+Пример результата (на основе лиц 4-х актрис создано новое лицо):
+
+<img src="https://github.com/Gourieff/Assets/blob/main/comfyui-reactor-node/0.4.2-whatsnew-02.jpg?raw=true" alt="0.4.2-whatsnew-02" width="75%"/>
+
+Базовый воркфлоу [💾](https://github.com/Gourieff/Assets/blob/main/comfyui-reactor-node/workflows/ReActor--Build-Blended-Face-Model--v1.json)
+
+### [0.4.1](https://github.com/Gourieff/comfyui-reactor-node/releases/tag/v0.4.1)
+
+- Поддержка CUDA 12 - не забудьте запустить (Windows) `install.bat` или (Linux/MacOS) `install.py` для используемого Python окружения или попробуйте установить ORT-GPU для CU12 вручную (https://onnxruntime.ai/docs/install/#install-onnx-runtime-gpu-cuda-12x)
+- Исправление Issue https://github.com/Gourieff/comfyui-reactor-node/issues/173
+
+- Отдельный Нод для восстаноления лиц (FR https://github.com/Gourieff/comfyui-reactor-node/issues/191), располагается внутри меню ReActor (нод RestoreFace)
+- (Windows) Установка зависимостей теперь может быть выполнена в Python из PATH ОС
+- Разные исправления и улучшения
+
+- Face Restore Visibility и CodeFormer Weight (Fidelity) теперь доступны; не забудьте заново добавить Нод в ваших существующих воркфлоу
+
+<img src="https://github.com/Gourieff/Assets/blob/main/comfyui-reactor-node/0.4.1-whatsnew-01.jpg?raw=true" alt="0.4.1-whatsnew-01" width="100%"/>
+
+### [0.4.0](https://github.com/Gourieff/comfyui-reactor-node/releases/tag/v0.4.0)
+
+- Вход "input_image" теперь идёт первым, это даёт возможность корректного байпаса, а также это правильно с точки зрения расположения входов (главный вход - первый);
+- Теперь можно сохранять модели лиц в качестве файлов "safetensors" (`ComfyUI\models\reactor\faces`) и загружать их в ReActor, реализуя разные сценарии использования, а также храня супер легкие модели лиц, которые вы чаще всего используете:
+
+<img src="https://github.com/Gourieff/Assets/blob/main/comfyui-reactor-node/0.4.0-whatsnew-01.jpg?raw=true" alt="0.4.0-whatsnew-01" width="100%"/>
+<img src="https://github.com/Gourieff/Assets/blob/main/comfyui-reactor-node/0.4.0-whatsnew-02.jpg?raw=true" alt="0.4.0-whatsnew-02" width="100%"/>
+
+- Возможность сохранять модели лиц напрямую из изображения:
+
+<img src="https://github.com/Gourieff/Assets/blob/main/comfyui-reactor-node/0.4.0-whatsnew-03.jpg?raw=true" alt="0.4.0-whatsnew-03" width="50%"/>
+
+- Оба входа опциональны, присоедините один из них в соответствии с вашим воркфлоу; если присоеденены оба - вход `image` имеет приоритет.
+- Различные исправления, делающие это расширение лучше.
+
+Спасибо всем, кто находит ошибки, предлагает новые функции и поддерживает данный проект!
+
+</details>
+
+<a name="installation">
+
+## Установка
+
+<details>
+ <summary>SD WebUI: <a href="https://github.com/AUTOMATIC1111/stable-diffusion-webui/">AUTOMATIC1111</a> или <a href="https://github.com/vladmandic/automatic">SD.Next</a></summary>
+
+1. Закройте (остановите) SD-WebUI Сервер, если запущен
+2. (Для пользователей Windows):
+ - Установите [Visual Studio 2022](https://visualstudio.microsoft.com/downloads/) (Например, версию Community - этот шаг нужен для правильной компиляции библиотеки Insightface)
+ - ИЛИ только [VS C++ Build Tools](https://visualstudio.microsoft.com/visual-cpp-build-tools/), выберите "Desktop Development with C++" в разделе "Workloads -> Desktop & Mobile"
+ - ИЛИ если же вы не хотите устанавливать что-либо из вышеуказанного - выполните [данные шаги (раздел. I)](#insightfacebuild)
+3. Перейдите в `extensions\sd-webui-comfyui\ComfyUI\custom_nodes`
+4. Откройте Консоль или Терминал и выполните `git clone https://github.com/Gourieff/ComfyUI-ReActor`
+5. Перейдите в корневую директорию SD WebUI, откройте Консоль или Терминал и выполните (для пользователей Windows)`.\venv\Scripts\activate` или (для пользователей Linux/MacOS)`venv/bin/activate`
+6. `python -m pip install -U pip`
+7. `cd extensions\sd-webui-comfyui\ComfyUI\custom_nodes\comfyui-reactor-node`
+8. `python install.py`
+9. Пожалуйста, дождитесь полного завершения установки
+10. (Начиная с версии 0.3.0) Скачайте дополнительные модели восстановления лиц (по ссылке ниже) и сохраните их в папку `extensions\sd-webui-comfyui\ComfyUI\models\facerestore_models`:<br>
+https://huggingface.co/datasets/Gourieff/ReActor/tree/main/models/facerestore_models
+11. Запустите SD WebUI и проверьте консоль на сообщение, что ReActor Node работает:
+<img src="https://github.com/Gourieff/Assets/blob/main/comfyui-reactor-node/uploads/console_status_running.jpg?raw=true" alt="console_status_running" width="759"/>
+
+12. Перейдите во вкладку ComfyUI и найдите там ReActor Node внутри меню `ReActor` или через поиск:
+<img src="https://github.com/Gourieff/Assets/blob/main/comfyui-reactor-node/uploads/webui-demo.png?raw=true" alt="webui-demo" width="100%"/>
+<img src="https://github.com/Gourieff/Assets/blob/main/comfyui-reactor-node/uploads/search-demo.png?raw=true" alt="webui-demo" width="1043"/>
+
+</details>
+
+<details>
+ <summary>Портативная версия <a href="https://github.com/comfyanonymous/ComfyUI">ComfyUI</a> для Windows</summary>
+
+1. Сделайте следующее:
+ - Установите [Visual Studio 2022](https://visualstudio.microsoft.com/downloads/) (Например, версию Community - этот шаг нужен для правильной компиляции библиотеки Insightface)
+ - ИЛИ только [VS C++ Build Tools](https://visualstudio.microsoft.com/visual-cpp-build-tools/), выберите "Desktop Development with C++" в разделе "Workloads -> Desktop & Mobile"
+ - ИЛИ если же вы не хотите устанавливать что-либо из вышеуказанного - выполните [данные шаги (раздел. I)](#insightfacebuild)
+2. Выберите из двух вариантов:
+ - (ComfyUI Manager) Откройте ComfyUI Manager, нажвите "Install Custom Nodes", введите "ReActor" в поле "Search" и далее нажмите "Install". После того, как ComfyUI завершит установку, перезагрузите сервер.
+ - (Вручную) Перейдите в `ComfyUI\custom_nodes`, откройте Консоль и выполните `git clone https://github.com/Gourieff/ComfyUI-ReActor`
+3. Перейдите `ComfyUI\custom_nodes\comfyui-reactor-node` и запустите `install.bat`, дождитесь окончания установки
+4. Если модель "face_yolov8m.pt" у вас отсутствует - можете скачать её [отсюда](https://huggingface.co/datasets/Gourieff/ReActor/blob/main/models/detection/bbox/face_yolov8m.pt) и положить в папку "ComfyUI\models\ultralytics\bbox"
+<br>
+То же самое и с "Sams" моделями, скачайте одну или обе [отсюда](https://huggingface.co/datasets/Gourieff/ReActor/tree/main/models/sams) - и положите в папку "ComfyUI\models\sams"
+5. Запустите ComfyUI и найдите ReActor Node внутри меню `ReActor` или через поиск
+
+</details>
+
+<a name="usage">
+
+## Использование
+
+Вы можете найти ноды ReActor внутри меню `ReActor` или через поиск (достаточно ввести "ReActor" в поисковой строке)
+
+Список нодов:
+- ••• Main Nodes •••
+ - ReActorFaceSwap (Основной нод)
+ - ReActorFaceSwapOpt (Основной нод с доп. входом Options)
+ - ReActorOptions (Опции для ReActorFaceSwapOpt)
+ - ReActorFaceBoost (Нод Face Booster)
+ - ReActorMaskHelper (Masking Helper)
+- ••• Operations with Face Models •••
+ - ReActorSaveFaceModel (Save Face Model)
+ - ReActorLoadFaceModel (Load Face Model)
+ - ReActorBuildFaceModel (Build Blended Face Model)
+ - ReActorMakeFaceModelBatch (Make Face Model Batch)
+- ••• Additional Nodes •••
+ - ReActorRestoreFace (Face Restoration)
+ - ReActorImageDublicator (Dublicate one Image to Images List)
+ - ImageRGBA2RGB (Convert RGBA to RGB)
+
+Соедините все необходимые слоты (slots) и запустите очередь (query).
+
+### Входы основного Нода
+
+- `input_image` - это изображение, на котором надо поменять лицо или лица (целевое изображение, аналог "target image" в версии для SD WebUI);
+ - Поддерживаемые ноды: "Load Image", "Load Video" или любые другие ноды предоставляющие изображение в качестве выхода;
+- `source_image` - это изображение с лицом или лицами для замены (изображение-источник, аналог "source image" в версии для SD WebUI);
+ - Поддерживаемые ноды: "Load Image" или любые другие ноды с выходом Image(s);
+- `face_model` - это вход для выхода с нода "Load Face Model" или другого нода ReActor для загрузки модели лица (face model или face embedding), которое вы создали ранее через нод "Save Face Model";
+ - Поддерживаемые ноды: "Load Face Model", "Build Blended Face Model";
+
+### Выходы основного Нода
+
+- `IMAGE` - выход с готовым изображением (результатом);
+ - Поддерживаемые ноды: любые ноды с изображением на входе;
+- `FACE_MODEL` - выход, предоставляющий модель лица, построенную в ходе замены;
+ - Поддерживаемые ноды: "Save Face Model", "ReActor", "Make Face Model Batch";
+
+### Восстановление лиц
+
+Начиная с версии 0.3.0 ReActor Node имеет встроенное восстановление лиц.<br>Скачайте нужные вам модели (см. инструкцию по [Установке](#installation)) и выберите одну из них, чтобы улучшить качество финального лица.
+
+### Индексы Лиц (Face Indexes)
+
+По умолчанию ReActor определяет лица на изображении в порядке от "большого" к "малому".<br>Вы можете поменять эту опцию, используя нод ReActorFaceSwapOpt вместе с ReActorOptions.
+
+Если вам нужно заменить определенное лицо, вы можете указать индекс для исходного (source, с лицом) и входного (input, где будет замена лица) изображений.
+
+Индекс первого обнаруженного лица - 0.
+
+Вы можете задать индексы в том порядке, который вам нужен.<br>
+Например: 0,1,2 (для Source); 1,0,2 (для Input).<br>Это означает, что: второе лицо из Input (индекс = 1) будет заменено первым лицом из Source (индекс = 0) и так далее.
+
+### Определение Пола
+
+Вы можете обозначить, какой пол нужно определять на изображении.<br>
+ReActor заменит только то лицо, которое удовлетворяет заданному условию.
+
+### Модели Лиц
+Начиная с версии 0.4.0, вы можете сохранять модели лиц как файлы "safetensors" (хранятся в папке `ComfyUI\models\reactor\faces`) и загружать их в ReActor, реализуя разные сценарии использования, а также храня супер легкие модели лиц, которые вы чаще всего используете.
+
+Чтобы новые модели появились в списке моделей нода "Load Face Model" - обновите страницу of с ComfyUI.<br>
+(Рекомендую использовать ComfyUI Manager - иначе ваше воркфлоу может быть потеряно после перезагрузки страницы, если вы не сохранили его).
+
+<a name="troubleshooting">
+
+## Устранение проблем
+
+<a name="insightfacebuild">
+
+### **I. (Для пользователей Windows) Если вы до сих пор не можете установить пакет Insightface по каким-то причинам или же просто не желаете устанавливать Visual Studio или VS C++ Build Tools - сделайте следующее:**
+
+1. (ComfyUI Portable) Находясь в корневой директории, проверьте версию Python:<br>запустите CMD и выполните `python_embeded\python.exe -V`<br>Вы должны увидеть версию или 3.10, или 3.11, или 3.12
+2. Скачайте готовый пакет Insightface [для версии 3.10](https://github.com/Gourieff/sd-webui-reactor/raw/main/example/insightface-0.7.3-cp310-cp310-win_amd64.whl) или [для 3.11](https://github.com/Gourieff/Assets/raw/main/Insightface/insightface-0.7.3-cp311-cp311-win_amd64.whl) (если на предыдущем шаге вы увидели 3.11) или [для 3.12](https://github.com/Gourieff/Assets/raw/main/Insightface/insightface-0.7.3-cp312-cp312-win_amd64.whl) (если на предыдущем шаге вы увидели 3.12) и сохраните его в корневую директорию stable-diffusion-webui (A1111 или SD.Next) - туда, где лежит файл "webui-user.bat" -ИЛИ- в корневую директорию ComfyUI, если вы используете ComfyUI Portable
+3. Из корневой директории запустите:
+ - (SD WebUI) CMD и `.\venv\Scripts\activate`
+ - (ComfyUI Portable) CMD
+4. Обновите PIP:
+ - (SD WebUI) `python -m pip install -U pip`
+ - (ComfyUI Portable) `python_embeded\python.exe -m pip install -U pip`
+5. Затем установите Insightface:
+ - (SD WebUI) `pip install insightface-0.7.3-cp310-cp310-win_amd64.whl` (для 3.10) или `pip install insightface-0.7.3-cp311-cp311-win_amd64.whl` (для 3.11) или `pip install insightface-0.7.3-cp312-cp312-win_amd64.whl` (for 3.12)
+ - (ComfyUI Portable) `python_embeded\python.exe -m pip install insightface-0.7.3-cp310-cp310-win_amd64.whl` (для 3.10) или `python_embeded\python.exe -m pip install insightface-0.7.3-cp311-cp311-win_amd64.whl` (для 3.11) или `python_embeded\python.exe -m pip install insightface-0.7.3-cp312-cp312-win_amd64.whl` (for 3.12)
+6. Готово!
+
+### **II. "AttributeError: 'NoneType' object has no attribute 'get'"**
+
+Эта ошибка появляется, если что-то не так с файлом модели `inswapper_128.onnx`
+
+Скачайте вручную по ссылке [отсюда](https://github.com/facefusion/facefusion-assets/releases/download/models/inswapper_128.onnx)
+и сохраните в директорию `ComfyUI\models\insightface`, заменив имеющийся файл
+
+### **III. "reactor.execute() got an unexpected keyword argument 'reference_image'"**
+
+Это означает, что поменялось обозначение входных точек (input points) всвязи с последним обновлением<br>
+Удалите из вашего рабочего пространства имеющийся ReActor Node и добавьте его снова
+
+### **IV. ControlNet Aux Node IMPORT failed - при использовании совместно с нодом ReActor**
+
+1. Закройте или остановите ComfyUI сервер, если он запущен
+2. Перейдите в корневую папку ComfyUI, откройте консоль CMD и выполните следующее:
+ - `python_embeded\python.exe -m pip uninstall -y opencv-python opencv-contrib-python opencv-python-headless`
+ - `python_embeded\python.exe -m pip install opencv-python==4.7.0.72`
+3. Готово!
+
+<img src="https://github.com/Gourieff/Assets/blob/main/comfyui-reactor-node/uploads/reactor-w-controlnet.png?raw=true" alt="reactor+controlnet" />
+
+### **V. "ModuleNotFoundError: No module named 'basicsr'" или "subprocess-exited-with-error" при установке пакета future-0.18.3**
+
+- Скачайте https://github.com/Gourieff/Assets/raw/main/comfyui-reactor-node/future-0.18.3-py3-none-any.whl<br>
+- Скопируйте файл в корневую папку ComfyUI и выполните в консоли:
+
+ python_embeded\python.exe -m pip install future-0.18.3-py3-none-any.whl
+
+- Затем:
+
+ python_embeded\python.exe -m pip install basicsr
+
+### **VI. "fatal: fetch-pack: invalid index-pack output" при исполнении команды `git clone`"**
+
+Попробуйте клонировать репозиторий с параметром `--depth=1` (только последний коммит):
+
+ git clone --depth=1 https://github.com/Gourieff/ComfyUI-ReActor
+
+Затем вытяните оставшееся (если требуется):
+
+ git fetch --unshallow
+
+<a name="updating">
+
+## Обновление
+
+Положите .bat или .sh скрипт из [данного репозитория](https://github.com/Gourieff/sd-webui-extensions-updater) в папку `ComfyUI\custom_nodes` и запустите, когда желаете обновить ComfyUI и Ноды
+
+<a name="disclaimer">
+
+## Ответственность
+
+Это программное обеспечение призвано стать продуктивным вкладом в быстрорастущую медиаиндустрию на основе генеративных сетей и искусственного интеллекта. Данное ПО поможет художникам в решении таких задач, как анимация собственного персонажа или использование персонажа в качестве модели для одежды и т.д.
+
+Разработчики этого программного обеспечения осведомлены о возможных неэтичных применениях и обязуются принять против этого превентивные меры. Мы продолжим развивать этот проект в позитивном направлении, придерживаясь закона и этики.
+
+Подразумевается, что пользователи этого программного обеспечения будут использовать его ответственно, соблюдая локальное законодательство. Если используется лицо реального человека, пользователь обязан получить согласие заинтересованного лица и четко указать, что это дипфейк при размещении контента в Интернете. **Разработчики и Со-авторы данного программного обеспечения не несут ответственности за действия конечных пользователей.**
+
+Используя данное расширение, вы соглашаетесь не создавать материалы, которые:
+- нарушают какие-либо действующие законы тех или иных государств или международных организаций;
+- причиняют какой-либо вред человеку или лицам;
+- пропагандируют любую информацию (как общедоступную, так и личную) или изображения (как общедоступные, так и личные), которые могут быть направлены на причинение вреда;
+- используются для распространения дезинформации;
+- нацелены на уязвимые группы людей.
+
+Данное программное обеспечение использует предварительно обученные модели `buffalo_l` и `inswapper_128.onnx`, представленные разработчиками [InsightFace](https://github.com/deepinsight/insightface/). Эти модели распространяются при следующих условиях:
+
+[Перевод из текста лицензии insighface](https://github.com/deepinsight/insightface/tree/master/python-package): Предварительно обученные модели InsightFace доступны только для некоммерческих исследовательских целей. Сюда входят как модели с автоматической загрузкой, так и модели, загруженные вручную.
+
+Пользователи данного программного обеспечения должны строго соблюдать данные условия использования. Разработчики и Со-авторы данного программного продукта не несут ответственности за неправильное использование предварительно обученных моделей InsightFace.
+
+Обратите внимание: если вы собираетесь использовать это программное обеспечение в каких-либо коммерческих целях, вам необходимо будет обучить свои собственные модели или найти модели, которые можно использовать в коммерческих целях.
+
+### Хэш файлов моделей
+
+#### Безопасные для использования модели имеют следующий хэш:
+
+inswapper_128.onnx
+```
+MD5:a3a155b90354160350efd66fed6b3d80
+SHA256:e4a3f08c753cb72d04e10aa0f7dbe3deebbf39567d4ead6dce08e98aa49e16af
+```
+
+1k3d68.onnx
+
+```
+MD5:6fb94fcdb0055e3638bf9158e6a108f4
+SHA256:df5c06b8a0c12e422b2ed8947b8869faa4105387f199c477af038aa01f9a45cc
+```
+
+2d106det.onnx
+
+```
+MD5:a3613ef9eb3662b4ef88eb90db1fcf26
+SHA256:f001b856447c413801ef5c42091ed0cd516fcd21f2d6b79635b1e733a7109dbf
+```
+
+det_10g.onnx
+
+```
+MD5:4c10eef5c9e168357a16fdd580fa8371
+SHA256:5838f7fe053675b1c7a08b633df49e7af5495cee0493c7dcf6697200b85b5b91
+```
+
+genderage.onnx
+
+```
+MD5:81c77ba87ab38163b0dec6b26f8e2af2
+SHA256:4fde69b1c810857b88c64a335084f1c3fe8f01246c9a191b48c7bb756d6652fb
+```
+
+w600k_r50.onnx
+
+```
+MD5:80248d427976241cbd1343889ed132b3
+SHA256:4c06341c33c2ca1f86781dab0e829f88ad5b64be9fba56e56bc9ebdefc619e43
+```
+
+**Пожалуйста, сравните хэш, если вы скачиваете данные модели из непроверенных источников**
+
+<a name="credits">
+
+## Благодарности и авторы компонентов
+
+<details>
+ <summary><a>Нажмите, чтобы посмотреть</a></summary>
+
+<br>
+
+|файл|источник|лицензия|
+|----|--------|--------|
+|[buffalo_l.zip](https://huggingface.co/datasets/Gourieff/ReActor/blob/main/models/buffalo_l.zip) | [DeepInsight](https://github.com/deepinsight/insightface) | ![license](https://img.shields.io/badge/license-non_commercial-red) |
+| [codeformer-v0.1.0.pth](https://huggingface.co/datasets/Gourieff/ReActor/blob/main/models/facerestore_models/codeformer-v0.1.0.pth) | [sczhou](https://github.com/sczhou/CodeFormer) | ![license](https://img.shields.io/badge/license-non_commercial-red) |
+| [GFPGANv1.3.pth](https://huggingface.co/datasets/Gourieff/ReActor/blob/main/models/facerestore_models/GFPGANv1.3.pth) | [TencentARC](https://github.com/TencentARC/GFPGAN) | ![license](https://img.shields.io/badge/license-Apache_2.0-green.svg) |
+| [GFPGANv1.4.pth](https://huggingface.co/datasets/Gourieff/ReActor/blob/main/models/facerestore_models/GFPGANv1.4.pth) | [TencentARC](https://github.com/TencentARC/GFPGAN) | ![license](https://img.shields.io/badge/license-Apache_2.0-green.svg) |
+| [inswapper_128.onnx](https://github.com/facefusion/facefusion-assets/releases/download/models/inswapper_128.onnx) | [DeepInsight](https://github.com/deepinsight/insightface) | ![license](https://img.shields.io/badge/license-non_commercial-red) |
+| [inswapper_128_fp16.onnx](https://github.com/facefusion/facefusion-assets/releases/download/models/inswapper_128_fp16.onnx) | [Hillobar](https://github.com/Hillobar/Rope) | ![license](https://img.shields.io/badge/license-non_commercial-red) |
+
+[BasicSR](https://github.com/XPixelGroup/BasicSR) - [@XPixelGroup](https://github.com/XPixelGroup) <br>
+[facexlib](https://github.com/xinntao/facexlib) - [@xinntao](https://github.com/xinntao) <br>
+
+[@s0md3v](https://github.com/s0md3v), [@henryruhs](https://github.com/henryruhs) - оригинальное приложение Roop <br>
+[@ssitu](https://github.com/ssitu) - первая версия расширения с поддержкой ComfyUI [ComfyUI_roop](https://github.com/ssitu/ComfyUI_roop)
+
+</details>
+
+<a name="note">
+
+### Обратите внимание!
+
+**Если у вас возникли какие-либо ошибки при очередном использовании Нода ReActor - не торопитесь открывать Issue, для начала попробуйте удалить текущий Нод из вашего рабочего пространства и добавить его снова**
+
+**ReActor Node периодически получает обновления, появляются новые функции, из-за чего имеющийся Нод может работать с ошибками или не работать вовсе**
diff --git a/__init__.py b/__init__.py
new file mode 100644
index 0000000..3f8982d
--- /dev/null
+++ b/__init__.py
@@ -0,0 +1,39 @@
+import sys
+import os
+
+repo_dir = os.path.dirname(os.path.realpath(__file__))
+sys.path.insert(0, repo_dir)
+original_modules = sys.modules.copy()
+
+# Place aside existing modules if using a1111 web ui
+modules_used = [
+ "modules",
+ "modules.images",
+ "modules.processing",
+ "modules.scripts_postprocessing",
+ "modules.scripts",
+ "modules.shared",
+]
+original_webui_modules = {}
+for module in modules_used:
+ if module in sys.modules:
+ original_webui_modules[module] = sys.modules.pop(module)
+
+# Proceed with node setup
+from .nodes import NODE_CLASS_MAPPINGS, NODE_DISPLAY_NAME_MAPPINGS
+
+__all__ = ["NODE_CLASS_MAPPINGS", "NODE_DISPLAY_NAME_MAPPINGS"]
+
+# Clean up imports
+# Remove repo directory from path
+sys.path.remove(repo_dir)
+# Remove any new modules
+modules_to_remove = []
+for module in sys.modules:
+ if module not in original_modules and not module.startswith("google.protobuf") and not module.startswith("onnx") and not module.startswith("cv2"):
+ modules_to_remove.append(module)
+for module in modules_to_remove:
+ del sys.modules[module]
+
+# Restore original modules
+sys.modules.update(original_webui_modules)
diff --git a/install.bat b/install.bat
new file mode 100644
index 0000000..6ddc7c0
--- /dev/null
+++ b/install.bat
@@ -0,0 +1,37 @@
+@echo off
+setlocal enabledelayedexpansion
+
+:: Try to use embedded python first
+if exist ..\..\..\python_embeded\python.exe (
+ :: Use the embedded python
+ set PYTHON=..\..\..\python_embeded\python.exe
+) else (
+ :: Embedded python not found, check for python in the PATH
+ for /f "tokens=* USEBACKQ" %%F in (`python --version 2^>^&1`) do (
+ set PYTHON_VERSION=%%F
+ )
+ if errorlevel 1 (
+ echo I couldn't find an embedded version of Python, nor one in the Windows PATH. Please install manually.
+ pause
+ exit /b 1
+ ) else (
+ :: Use python from the PATH (if it's the right version and the user agrees)
+ echo I couldn't find an embedded version of Python, but I did find !PYTHON_VERSION! in your Windows PATH.
+ echo Would you like to proceed with the install using that version? (Y/N^)
+ set /p USE_PYTHON=
+ if /i "!USE_PYTHON!"=="Y" (
+ set PYTHON=python
+ ) else (
+ echo Okay. Please install manually.
+ pause
+ exit /b 1
+ )
+ )
+)
+
+:: Install the package
+echo Installing...
+%PYTHON% install.py
+echo Done^!
+
+@pause \ No newline at end of file
diff --git a/install.py b/install.py
new file mode 100644
index 0000000..bf4f6f2
--- /dev/null
+++ b/install.py
@@ -0,0 +1,104 @@
+import warnings
+warnings.filterwarnings("ignore", category=DeprecationWarning)
+
+import subprocess
+import os, sys
+try:
+ from pkg_resources import get_distribution as distributions
+except:
+ from importlib_metadata import distributions
+from tqdm import tqdm
+import urllib.request
+from packaging import version as pv
+try:
+ from folder_paths import models_dir
+except:
+ from pathlib import Path
+ models_dir = os.path.join(Path(__file__).parents[2], "models")
+
+sys.path.append(os.path.dirname(os.path.realpath(__file__)))
+
+req_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), "requirements.txt")
+
+model_url = "https://huggingface.co/datasets/Gourieff/ReActor/resolve/main/models/inswapper_128.onnx"
+model_name = os.path.basename(model_url)
+models_dir_path = os.path.join(models_dir, "insightface")
+model_path = os.path.join(models_dir_path, model_name)
+
+def run_pip(*args):
+ subprocess.run([sys.executable, "-m", "pip", "install", "--no-warn-script-location", *args])
+
+def is_installed (
+ package: str, version: str = None, strict: bool = True
+):
+ has_package = None
+ try:
+ has_package = distributions(package)
+ if has_package is not None:
+ if version is not None:
+ installed_version = has_package.version
+ if (installed_version != version and strict == True) or (pv.parse(installed_version) < pv.parse(version) and strict == False):
+ return False
+ else:
+ return True
+ else:
+ return True
+ else:
+ return False
+ except Exception as e:
+ print(f"Status: {e}")
+ return False
+
+def download(url, path, name):
+ request = urllib.request.urlopen(url)
+ total = int(request.headers.get('Content-Length', 0))
+ with tqdm(total=total, desc=f'[ReActor] Downloading {name} to {path}', unit='B', unit_scale=True, unit_divisor=1024) as progress:
+ urllib.request.urlretrieve(url, path, reporthook=lambda count, block_size, total_size: progress.update(block_size))
+
+if not os.path.exists(models_dir_path):
+ os.makedirs(models_dir_path)
+
+if not os.path.exists(model_path):
+ download(model_url, model_path, model_name)
+
+with open(req_file) as file:
+ try:
+ ort = "onnxruntime-gpu"
+ import torch
+ cuda_version = None
+ if torch.cuda.is_available():
+ cuda_version = torch.version.cuda
+ print(f"CUDA {cuda_version}")
+ elif torch.backends.mps.is_available() or hasattr(torch,'dml') or hasattr(torch,'privateuseone'):
+ ort = "onnxruntime"
+ if cuda_version is not None and float(cuda_version)>=12 and torch.torch_version.__version__ <= "2.2.0": # CU12.x and torch<=2.2.0
+ print(f"Torch: {torch.torch_version.__version__}")
+ if not is_installed(ort,"1.17.0",False):
+ run_pip(ort,"--extra-index-url", "https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/onnxruntime-cuda-12/pypi/simple/")
+ elif cuda_version is not None and float(cuda_version)>=12 and torch.torch_version.__version__ >= "2.4.0" : # CU12.x and latest torch
+ print(f"Torch: {torch.torch_version.__version__}")
+ if not is_installed(ort,"1.20.1",False): # latest ort-gpu
+ run_pip(ort,"-U")
+ elif not is_installed(ort,"1.16.1",False):
+ run_pip(ort, "-U")
+ except Exception as e:
+ print(e)
+ print(f"Warning: Failed to install {ort}, ReActor will not work.")
+ raise e
+ strict = True
+ for package in file:
+ package_version = None
+ try:
+ package = package.strip()
+ if "==" in package:
+ package_version = package.split('==')[1]
+ elif ">=" in package:
+ package_version = package.split('>=')[1]
+ strict = False
+ if not is_installed(package,package_version,strict):
+ run_pip(package)
+ except Exception as e:
+ print(e)
+ print(f"Warning: Failed to install {package}, ReActor will not work.")
+ raise e
+print("Ok")
diff --git a/nodes.py b/nodes.py
new file mode 100644
index 0000000..805863d
--- /dev/null
+++ b/nodes.py
@@ -0,0 +1,1237 @@
+import os, glob, sys
+import logging
+
+import torch
+import torch.nn.functional as torchfn
+from torchvision.transforms.functional import normalize
+from torchvision.ops import masks_to_boxes
+
+import numpy as np
+import cv2
+import math
+from typing import List
+from PIL import Image
+from scipy import stats
+from insightface.app.common import Face
+from segment_anything import sam_model_registry
+
+from modules.processing import StableDiffusionProcessingImg2Img
+from modules.shared import state
+# from comfy_extras.chainner_models import model_loading
+import comfy.model_management as model_management
+import comfy.utils
+import folder_paths
+
+import scripts.reactor_version
+from r_chainner import model_loading
+from scripts.reactor_faceswap import (
+ FaceSwapScript,
+ get_models,
+ get_current_faces_model,
+ analyze_faces,
+ half_det_size,
+ providers
+)
+from scripts.reactor_swapper import (
+ unload_all_models,
+)
+from scripts.reactor_logger import logger
+from reactor_utils import (
+ batch_tensor_to_pil,
+ batched_pil_to_tensor,
+ tensor_to_pil,
+ img2tensor,
+ tensor2img,
+ save_face_model,
+ load_face_model,
+ download,
+ set_ort_session,
+ prepare_cropped_face,
+ normalize_cropped_face,
+ add_folder_path_and_extensions,
+ rgba2rgb_tensor
+)
+from reactor_patcher import apply_patch
+from r_facelib.utils.face_restoration_helper import FaceRestoreHelper
+from r_basicsr.utils.registry import ARCH_REGISTRY
+import scripts.r_archs.codeformer_arch
+import scripts.r_masking.subcore as subcore
+import scripts.r_masking.core as core
+import scripts.r_masking.segs as masking_segs
+
+
+models_dir = folder_paths.models_dir
+REACTOR_MODELS_PATH = os.path.join(models_dir, "reactor")
+FACE_MODELS_PATH = os.path.join(REACTOR_MODELS_PATH, "faces")
+
+if not os.path.exists(REACTOR_MODELS_PATH):
+ os.makedirs(REACTOR_MODELS_PATH)
+ if not os.path.exists(FACE_MODELS_PATH):
+ os.makedirs(FACE_MODELS_PATH)
+
+dir_facerestore_models = os.path.join(models_dir, "facerestore_models")
+os.makedirs(dir_facerestore_models, exist_ok=True)
+folder_paths.folder_names_and_paths["facerestore_models"] = ([dir_facerestore_models], folder_paths.supported_pt_extensions)
+
+BLENDED_FACE_MODEL = None
+FACE_SIZE: int = 512
+FACE_HELPER = None
+
+if "ultralytics" not in folder_paths.folder_names_and_paths:
+ add_folder_path_and_extensions("ultralytics_bbox", [os.path.join(models_dir, "ultralytics", "bbox")], folder_paths.supported_pt_extensions)
+ add_folder_path_and_extensions("ultralytics_segm", [os.path.join(models_dir, "ultralytics", "segm")], folder_paths.supported_pt_extensions)
+ add_folder_path_and_extensions("ultralytics", [os.path.join(models_dir, "ultralytics")], folder_paths.supported_pt_extensions)
+if "sams" not in folder_paths.folder_names_and_paths:
+ add_folder_path_and_extensions("sams", [os.path.join(models_dir, "sams")], folder_paths.supported_pt_extensions)
+
+def get_facemodels():
+ models_path = os.path.join(FACE_MODELS_PATH, "*")
+ models = glob.glob(models_path)
+ models = [x for x in models if x.endswith(".safetensors")]
+ return models
+
+def get_restorers():
+ models_path = os.path.join(models_dir, "facerestore_models/*")
+ models = glob.glob(models_path)
+ models = [x for x in models if (x.endswith(".pth") or x.endswith(".onnx"))]
+ if len(models) == 0:
+ fr_urls = [
+ "https://huggingface.co/datasets/Gourieff/ReActor/resolve/main/models/facerestore_models/GFPGANv1.3.pth",
+ "https://huggingface.co/datasets/Gourieff/ReActor/resolve/main/models/facerestore_models/GFPGANv1.4.pth",
+ "https://huggingface.co/datasets/Gourieff/ReActor/resolve/main/models/facerestore_models/codeformer-v0.1.0.pth",
+ "https://huggingface.co/datasets/Gourieff/ReActor/resolve/main/models/facerestore_models/GPEN-BFR-512.onnx",
+ "https://huggingface.co/datasets/Gourieff/ReActor/resolve/main/models/facerestore_models/GPEN-BFR-1024.onnx",
+ "https://huggingface.co/datasets/Gourieff/ReActor/resolve/main/models/facerestore_models/GPEN-BFR-2048.onnx",
+ ]
+ for model_url in fr_urls:
+ model_name = os.path.basename(model_url)
+ model_path = os.path.join(dir_facerestore_models, model_name)
+ download(model_url, model_path, model_name)
+ models = glob.glob(models_path)
+ models = [x for x in models if (x.endswith(".pth") or x.endswith(".onnx"))]
+ return models
+
+def get_model_names(get_models):
+ models = get_models()
+ names = []
+ for x in models:
+ names.append(os.path.basename(x))
+ names.sort(key=str.lower)
+ names.insert(0, "none")
+ return names
+
+def model_names():
+ models = get_models()
+ return {os.path.basename(x): x for x in models}
+
+
+class reactor:
+ @classmethod
+ def INPUT_TYPES(s):
+ return {
+ "required": {
+ "enabled": ("BOOLEAN", {"default": True, "label_off": "OFF", "label_on": "ON"}),
+ "input_image": ("IMAGE",),
+ "swap_model": (list(model_names().keys()),),
+ "facedetection": (["retinaface_resnet50", "retinaface_mobile0.25", "YOLOv5l", "YOLOv5n"],),
+ "face_restore_model": (get_model_names(get_restorers),),
+ "face_restore_visibility": ("FLOAT", {"default": 1, "min": 0.1, "max": 1, "step": 0.05}),
+ "codeformer_weight": ("FLOAT", {"default": 0.5, "min": 0.0, "max": 1, "step": 0.05}),
+ "detect_gender_input": (["no","female","male"], {"default": "no"}),
+ "detect_gender_source": (["no","female","male"], {"default": "no"}),
+ "input_faces_index": ("STRING", {"default": "0"}),
+ "source_faces_index": ("STRING", {"default": "0"}),
+ "console_log_level": ([0, 1, 2], {"default": 1}),
+ },
+ "optional": {
+ "source_image": ("IMAGE",),
+ "face_model": ("FACE_MODEL",),
+ "face_boost": ("FACE_BOOST",),
+ },
+ "hidden": {"faces_order": "FACES_ORDER"},
+ }
+
+ RETURN_TYPES = ("IMAGE","FACE_MODEL")
+ FUNCTION = "execute"
+ CATEGORY = "🌌 ReActor"
+
+ def __init__(self):
+ # self.face_helper = None
+ self.faces_order = ["large-small", "large-small"]
+ # self.face_size = FACE_SIZE
+ self.face_boost_enabled = False
+ self.restore = True
+ self.boost_model = None
+ self.interpolation = "Bicubic"
+ self.boost_model_visibility = 1
+ self.boost_cf_weight = 0.5
+
+ def restore_face(
+ self,
+ input_image,
+ face_restore_model,
+ face_restore_visibility,
+ codeformer_weight,
+ facedetection,
+ ):
+
+ result = input_image
+
+ if face_restore_model != "none" and not model_management.processing_interrupted():
+
+ global FACE_SIZE, FACE_HELPER
+
+ self.face_helper = FACE_HELPER
+
+ faceSize = 512
+ if "1024" in face_restore_model.lower():
+ faceSize = 1024
+ elif "2048" in face_restore_model.lower():
+ faceSize = 2048
+
+ logger.status(f"Restoring with {face_restore_model} | Face Size is set to {faceSize}")
+
+ model_path = folder_paths.get_full_path("facerestore_models", face_restore_model)
+
+ device = model_management.get_torch_device()
+
+ if "codeformer" in face_restore_model.lower():
+
+ codeformer_net = ARCH_REGISTRY.get("CodeFormer")(
+ dim_embd=512,
+ codebook_size=1024,
+ n_head=8,
+ n_layers=9,
+ connect_list=["32", "64", "128", "256"],
+ ).to(device)
+ checkpoint = torch.load(model_path)["params_ema"]
+ codeformer_net.load_state_dict(checkpoint)
+ facerestore_model = codeformer_net.eval()
+
+ elif ".onnx" in face_restore_model:
+
+ ort_session = set_ort_session(model_path, providers=providers)
+ ort_session_inputs = {}
+ facerestore_model = ort_session
+
+ else:
+
+ sd = comfy.utils.load_torch_file(model_path, safe_load=True)
+ facerestore_model = model_loading.load_state_dict(sd).eval()
+ facerestore_model.to(device)
+
+ if faceSize != FACE_SIZE or self.face_helper is None:
+ self.face_helper = FaceRestoreHelper(1, face_size=faceSize, crop_ratio=(1, 1), det_model=facedetection, save_ext='png', use_parse=True, device=device)
+ FACE_SIZE = faceSize
+ FACE_HELPER = self.face_helper
+
+ image_np = 255. * result.numpy()
+
+ total_images = image_np.shape[0]
+
+ out_images = []
+
+ for i in range(total_images):
+
+ if total_images > 1:
+ logger.status(f"Restoring {i+1}")
+
+ cur_image_np = image_np[i,:, :, ::-1]
+
+ original_resolution = cur_image_np.shape[0:2]
+
+ if facerestore_model is None or self.face_helper is None:
+ return result
+
+ self.face_helper.clean_all()
+ self.face_helper.read_image(cur_image_np)
+ self.face_helper.get_face_landmarks_5(only_center_face=False, resize=640, eye_dist_threshold=5)
+ self.face_helper.align_warp_face()
+
+ restored_face = None
+
+ for idx, cropped_face in enumerate(self.face_helper.cropped_faces):
+
+ # if ".pth" in face_restore_model:
+ cropped_face_t = img2tensor(cropped_face / 255., bgr2rgb=True, float32=True)
+ normalize(cropped_face_t, (0.5, 0.5, 0.5), (0.5, 0.5, 0.5), inplace=True)
+ cropped_face_t = cropped_face_t.unsqueeze(0).to(device)
+
+ try:
+
+ with torch.no_grad():
+
+ if ".onnx" in face_restore_model: # ONNX models
+
+ for ort_session_input in ort_session.get_inputs():
+ if ort_session_input.name == "input":
+ cropped_face_prep = prepare_cropped_face(cropped_face)
+ ort_session_inputs[ort_session_input.name] = cropped_face_prep
+ if ort_session_input.name == "weight":
+ weight = np.array([ 1 ], dtype = np.double)
+ ort_session_inputs[ort_session_input.name] = weight
+
+ output = ort_session.run(None, ort_session_inputs)[0][0]
+ restored_face = normalize_cropped_face(output)
+
+ else: # PTH models
+
+ output = facerestore_model(cropped_face_t, w=codeformer_weight)[0] if "codeformer" in face_restore_model.lower() else facerestore_model(cropped_face_t)[0]
+ restored_face = tensor2img(output, rgb2bgr=True, min_max=(-1, 1))
+
+ del output
+ torch.cuda.empty_cache()
+
+ except Exception as error:
+
+ print(f"\tFailed inference: {error}", file=sys.stderr)
+ restored_face = tensor2img(cropped_face_t, rgb2bgr=True, min_max=(-1, 1))
+
+ if face_restore_visibility < 1:
+ restored_face = cropped_face * (1 - face_restore_visibility) + restored_face * face_restore_visibility
+
+ restored_face = restored_face.astype("uint8")
+ self.face_helper.add_restored_face(restored_face)
+
+ self.face_helper.get_inverse_affine(None)
+
+ restored_img = self.face_helper.paste_faces_to_input_image()
+ restored_img = restored_img[:, :, ::-1]
+
+ if original_resolution != restored_img.shape[0:2]:
+ restored_img = cv2.resize(restored_img, (0, 0), fx=original_resolution[1]/restored_img.shape[1], fy=original_resolution[0]/restored_img.shape[0], interpolation=cv2.INTER_AREA)
+
+ self.face_helper.clean_all()
+
+ # out_images[i] = restored_img
+ out_images.append(restored_img)
+
+ if state.interrupted or model_management.processing_interrupted():
+ logger.status("Interrupted by User")
+ return input_image
+
+ restored_img_np = np.array(out_images).astype(np.float32) / 255.0
+ restored_img_tensor = torch.from_numpy(restored_img_np)
+
+ result = restored_img_tensor
+
+ return result
+
+ def execute(self, enabled, input_image, swap_model, detect_gender_source, detect_gender_input, source_faces_index, input_faces_index, console_log_level, face_restore_model,face_restore_visibility, codeformer_weight, facedetection, source_image=None, face_model=None, faces_order=None, face_boost=None):
+
+ if face_boost is not None:
+ self.face_boost_enabled = face_boost["enabled"]
+ self.boost_model = face_boost["boost_model"]
+ self.interpolation = face_boost["interpolation"]
+ self.boost_model_visibility = face_boost["visibility"]
+ self.boost_cf_weight = face_boost["codeformer_weight"]
+ self.restore = face_boost["restore_with_main_after"]
+ else:
+ self.face_boost_enabled = False
+
+ if faces_order is None:
+ faces_order = self.faces_order
+
+ apply_patch(console_log_level)
+
+ if not enabled:
+ return (input_image,face_model)
+ elif source_image is None and face_model is None:
+ logger.error("Please provide 'source_image' or `face_model`")
+ return (input_image,face_model)
+
+ if face_model == "none":
+ face_model = None
+
+ script = FaceSwapScript()
+ pil_images = batch_tensor_to_pil(input_image)
+ if source_image is not None:
+ source = tensor_to_pil(source_image)
+ else:
+ source = None
+ p = StableDiffusionProcessingImg2Img(pil_images)
+ script.process(
+ p=p,
+ img=source,
+ enable=True,
+ source_faces_index=source_faces_index,
+ faces_index=input_faces_index,
+ model=swap_model,
+ swap_in_source=True,
+ swap_in_generated=True,
+ gender_source=detect_gender_source,
+ gender_target=detect_gender_input,
+ face_model=face_model,
+ faces_order=faces_order,
+ # face boost:
+ face_boost_enabled=self.face_boost_enabled,
+ face_restore_model=self.boost_model,
+ face_restore_visibility=self.boost_model_visibility,
+ codeformer_weight=self.boost_cf_weight,
+ interpolation=self.interpolation,
+ )
+ result = batched_pil_to_tensor(p.init_images)
+
+ if face_model is None:
+ current_face_model = get_current_faces_model()
+ face_model_to_provide = current_face_model[0] if (current_face_model is not None and len(current_face_model) > 0) else face_model
+ else:
+ face_model_to_provide = face_model
+
+ if self.restore or not self.face_boost_enabled:
+ result = reactor.restore_face(self,result,face_restore_model,face_restore_visibility,codeformer_weight,facedetection)
+
+ return (result,face_model_to_provide)
+
+
+class ReActorPlusOpt:
+ @classmethod
+ def INPUT_TYPES(s):
+ return {
+ "required": {
+ "enabled": ("BOOLEAN", {"default": True, "label_off": "OFF", "label_on": "ON"}),
+ "input_image": ("IMAGE",),
+ "swap_model": (list(model_names().keys()),),
+ "facedetection": (["retinaface_resnet50", "retinaface_mobile0.25", "YOLOv5l", "YOLOv5n"],),
+ "face_restore_model": (get_model_names(get_restorers),),
+ "face_restore_visibility": ("FLOAT", {"default": 1, "min": 0.1, "max": 1, "step": 0.05}),
+ "codeformer_weight": ("FLOAT", {"default": 0.5, "min": 0.0, "max": 1, "step": 0.05}),
+ },
+ "optional": {
+ "source_image": ("IMAGE",),
+ "face_model": ("FACE_MODEL",),
+ "options": ("OPTIONS",),
+ "face_boost": ("FACE_BOOST",),
+ }
+ }
+
+ RETURN_TYPES = ("IMAGE","FACE_MODEL")
+ FUNCTION = "execute"
+ CATEGORY = "🌌 ReActor"
+
+ def __init__(self):
+ # self.face_helper = None
+ self.faces_order = ["large-small", "large-small"]
+ self.detect_gender_input = "no"
+ self.detect_gender_source = "no"
+ self.input_faces_index = "0"
+ self.source_faces_index = "0"
+ self.console_log_level = 1
+ # self.face_size = 512
+ self.face_boost_enabled = False
+ self.restore = True
+ self.boost_model = None
+ self.interpolation = "Bicubic"
+ self.boost_model_visibility = 1
+ self.boost_cf_weight = 0.5
+
+ def execute(self, enabled, input_image, swap_model, facedetection, face_restore_model, face_restore_visibility, codeformer_weight, source_image=None, face_model=None, options=None, face_boost=None):
+
+ if options is not None:
+ self.faces_order = [options["input_faces_order"], options["source_faces_order"]]
+ self.console_log_level = options["console_log_level"]
+ self.detect_gender_input = options["detect_gender_input"]
+ self.detect_gender_source = options["detect_gender_source"]
+ self.input_faces_index = options["input_faces_index"]
+ self.source_faces_index = options["source_faces_index"]
+
+ if face_boost is not None:
+ self.face_boost_enabled = face_boost["enabled"]
+ self.restore = face_boost["restore_with_main_after"]
+ else:
+ self.face_boost_enabled = False
+
+ result = reactor.execute(
+ self,enabled,input_image,swap_model,self.detect_gender_source,self.detect_gender_input,self.source_faces_index,self.input_faces_index,self.console_log_level,face_restore_model,face_restore_visibility,codeformer_weight,facedetection,source_image,face_model,self.faces_order, face_boost=face_boost
+ )
+
+ return result
+
+
+class LoadFaceModel:
+ @classmethod
+ def INPUT_TYPES(s):
+ return {
+ "required": {
+ "face_model": (get_model_names(get_facemodels),),
+ }
+ }
+
+ RETURN_TYPES = ("FACE_MODEL",)
+ FUNCTION = "load_model"
+ CATEGORY = "🌌 ReActor"
+
+ def load_model(self, face_model):
+ self.face_model = face_model
+ self.face_models_path = FACE_MODELS_PATH
+ if self.face_model != "none":
+ face_model_path = os.path.join(self.face_models_path, self.face_model)
+ out = load_face_model(face_model_path)
+ else:
+ out = None
+ return (out, )
+
+
+class BuildFaceModel:
+ def __init__(self):
+ self.output_dir = FACE_MODELS_PATH
+
+ @classmethod
+ def INPUT_TYPES(s):
+ return {
+ "required": {
+ "save_mode": ("BOOLEAN", {"default": True, "label_off": "OFF", "label_on": "ON"}),
+ "send_only": ("BOOLEAN", {"default": False, "label_off": "NO", "label_on": "YES"}),
+ "face_model_name": ("STRING", {"default": "default"}),
+ "compute_method": (["Mean", "Median", "Mode"], {"default": "Mean"}),
+ },
+ "optional": {
+ "images": ("IMAGE",),
+ "face_models": ("FACE_MODEL",),
+ }
+ }
+
+ RETURN_TYPES = ("FACE_MODEL",)
+ FUNCTION = "blend_faces"
+
+ OUTPUT_NODE = True
+
+ CATEGORY = "🌌 ReActor"
+
+ def build_face_model(self, image: Image.Image, det_size=(640, 640)):
+ logging.StreamHandler.terminator = "\n"
+ if image is None:
+ error_msg = "Please load an Image"
+ logger.error(error_msg)
+ return error_msg
+ image = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
+ face_model = analyze_faces(image, det_size)
+
+ if len(face_model) == 0:
+ print("")
+ det_size_half = half_det_size(det_size)
+ face_model = analyze_faces(image, det_size_half)
+ if face_model is not None and len(face_model) > 0:
+ print("...........................................................", end=" ")
+
+ if face_model is not None and len(face_model) > 0:
+ return face_model[0]
+ else:
+ no_face_msg = "No face found, please try another image"
+ # logger.error(no_face_msg)
+ return no_face_msg
+
+ def blend_faces(self, save_mode, send_only, face_model_name, compute_method, images=None, face_models=None):
+ global BLENDED_FACE_MODEL
+ blended_face: Face = BLENDED_FACE_MODEL
+
+ if send_only and blended_face is None:
+ send_only = False
+
+ if (images is not None or face_models is not None) and not send_only:
+
+ faces = []
+ embeddings = []
+
+ apply_patch(1)
+
+ if images is not None:
+ images_list: List[Image.Image] = batch_tensor_to_pil(images)
+
+ n = len(images_list)
+
+ for i,image in enumerate(images_list):
+ logging.StreamHandler.terminator = " "
+ logger.status(f"Building Face Model {i+1} of {n}...")
+ face = self.build_face_model(image)
+ if isinstance(face, str):
+ logger.error(f"No faces found in image {i+1}, skipping")
+ continue
+ else:
+ print(f"{int(((i+1)/n)*100)}%")
+ faces.append(face)
+ embeddings.append(face.embedding)
+
+ elif face_models is not None:
+
+ n = len(face_models)
+
+ for i,face_model in enumerate(face_models):
+ logging.StreamHandler.terminator = " "
+ logger.status(f"Extracting Face Model {i+1} of {n}...")
+ face = face_model
+ if isinstance(face, str):
+ logger.error(f"No faces found for face_model {i+1}, skipping")
+ continue
+ else:
+ print(f"{int(((i+1)/n)*100)}%")
+ faces.append(face)
+ embeddings.append(face.embedding)
+
+ logging.StreamHandler.terminator = "\n"
+ if len(faces) > 0:
+ # compute_method_name = "Mean" if compute_method == 0 else "Median" if compute_method == 1 else "Mode"
+ logger.status(f"Blending with Compute Method '{compute_method}'...")
+ blended_embedding = np.mean(embeddings, axis=0) if compute_method == "Mean" else np.median(embeddings, axis=0) if compute_method == "Median" else stats.mode(embeddings, axis=0)[0].astype(np.float32)
+ blended_face = Face(
+ bbox=faces[0].bbox,
+ kps=faces[0].kps,
+ det_score=faces[0].det_score,
+ landmark_3d_68=faces[0].landmark_3d_68,
+ pose=faces[0].pose,
+ landmark_2d_106=faces[0].landmark_2d_106,
+ embedding=blended_embedding,
+ gender=faces[0].gender,
+ age=faces[0].age
+ )
+ if blended_face is not None:
+ BLENDED_FACE_MODEL = blended_face
+ if save_mode:
+ face_model_path = os.path.join(FACE_MODELS_PATH, face_model_name + ".safetensors")
+ save_face_model(blended_face,face_model_path)
+ # done_msg = f"Face model has been saved to '{face_model_path}'"
+ # logger.status(done_msg)
+ logger.status("--Done!--")
+ # return (blended_face,)
+ else:
+ no_face_msg = "Something went wrong, please try another set of images"
+ logger.error(no_face_msg)
+ # return (blended_face,)
+ # logger.status("--Done!--")
+ if images is None and face_models is None:
+ logger.error("Please provide `images` or `face_models`")
+ return (blended_face,)
+
+
+class SaveFaceModel:
+ def __init__(self):
+ self.output_dir = FACE_MODELS_PATH
+
+ @classmethod
+ def INPUT_TYPES(s):
+ return {
+ "required": {
+ "save_mode": ("BOOLEAN", {"default": True, "label_off": "OFF", "label_on": "ON"}),
+ "face_model_name": ("STRING", {"default": "default"}),
+ "select_face_index": ("INT", {"default": 0, "min": 0}),
+ },
+ "optional": {
+ "image": ("IMAGE",),
+ "face_model": ("FACE_MODEL",),
+ }
+ }
+
+ RETURN_TYPES = ()
+ FUNCTION = "save_model"
+
+ OUTPUT_NODE = True
+
+ CATEGORY = "🌌 ReActor"
+
+ def save_model(self, save_mode, face_model_name, select_face_index, image=None, face_model=None, det_size=(640, 640)):
+ if save_mode and image is not None:
+ source = tensor_to_pil(image)
+ source = cv2.cvtColor(np.array(source), cv2.COLOR_RGB2BGR)
+ apply_patch(1)
+ logger.status("Building Face Model...")
+ face_model_raw = analyze_faces(source, det_size)
+ if len(face_model_raw) == 0:
+ det_size_half = half_det_size(det_size)
+ face_model_raw = analyze_faces(source, det_size_half)
+ try:
+ face_model = face_model_raw[select_face_index]
+ except:
+ logger.error("No face(s) found")
+ return face_model_name
+ logger.status("--Done!--")
+ if save_mode and (face_model != "none" or face_model is not None):
+ face_model_path = os.path.join(self.output_dir, face_model_name + ".safetensors")
+ save_face_model(face_model,face_model_path)
+ if image is None and face_model is None:
+ logger.error("Please provide `face_model` or `image`")
+ return face_model_name
+
+
+class RestoreFace:
+ @classmethod
+ def INPUT_TYPES(s):
+ return {
+ "required": {
+ "image": ("IMAGE",),
+ "facedetection": (["retinaface_resnet50", "retinaface_mobile0.25", "YOLOv5l", "YOLOv5n"],),
+ "model": (get_model_names(get_restorers),),
+ "visibility": ("FLOAT", {"default": 1, "min": 0.0, "max": 1, "step": 0.05}),
+ "codeformer_weight": ("FLOAT", {"default": 0.5, "min": 0.0, "max": 1, "step": 0.05}),
+ },
+ }
+
+ RETURN_TYPES = ("IMAGE",)
+ FUNCTION = "execute"
+ CATEGORY = "🌌 ReActor"
+
+ # def __init__(self):
+ # self.face_helper = None
+ # self.face_size = 512
+
+ def execute(self, image, model, visibility, codeformer_weight, facedetection):
+ result = reactor.restore_face(self,image,model,visibility,codeformer_weight,facedetection)
+ return (result,)
+
+
+class MaskHelper:
+ def __init__(self):
+ # self.threshold = 0.5
+ # self.dilation = 10
+ # self.crop_factor = 3.0
+ # self.drop_size = 1
+ self.labels = "all"
+ self.detailer_hook = None
+ self.device_mode = "AUTO"
+ self.detection_hint = "center-1"
+ # self.sam_dilation = 0
+ # self.sam_threshold = 0.93
+ # self.bbox_expansion = 0
+ # self.mask_hint_threshold = 0.7
+ # self.mask_hint_use_negative = "False"
+ # self.force_resize_width = 0
+ # self.force_resize_height = 0
+ # self.resize_behavior = "source_size"
+
+ @classmethod
+ def INPUT_TYPES(s):
+ bboxs = ["bbox/"+x for x in folder_paths.get_filename_list("ultralytics_bbox")]
+ segms = ["segm/"+x for x in folder_paths.get_filename_list("ultralytics_segm")]
+ sam_models = [x for x in folder_paths.get_filename_list("sams") if 'hq' not in x]
+ return {
+ "required": {
+ "image": ("IMAGE",),
+ "swapped_image": ("IMAGE",),
+ "bbox_model_name": (bboxs + segms, ),
+ "bbox_threshold": ("FLOAT", {"default": 0.5, "min": 0.0, "max": 1.0, "step": 0.01}),
+ "bbox_dilation": ("INT", {"default": 10, "min": -512, "max": 512, "step": 1}),
+ "bbox_crop_factor": ("FLOAT", {"default": 3.0, "min": 1.0, "max": 100, "step": 0.1}),
+ "bbox_drop_size": ("INT", {"min": 1, "max": 8192, "step": 1, "default": 10}),
+ "sam_model_name": (sam_models, ),
+ "sam_dilation": ("INT", {"default": 0, "min": -512, "max": 512, "step": 1}),
+ "sam_threshold": ("FLOAT", {"default": 0.93, "min": 0.0, "max": 1.0, "step": 0.01}),
+ "bbox_expansion": ("INT", {"default": 0, "min": 0, "max": 1000, "step": 1}),
+ "mask_hint_threshold": ("FLOAT", {"default": 0.7, "min": 0.0, "max": 1.0, "step": 0.01}),
+ "mask_hint_use_negative": (["False", "Small", "Outter"], ),
+ "morphology_operation": (["dilate", "erode", "open", "close"],),
+ "morphology_distance": ("INT", {"default": 0, "min": 0, "max": 128, "step": 1}),
+ "blur_radius": ("INT", {"default": 9, "min": 0, "max": 48, "step": 1}),
+ "sigma_factor": ("FLOAT", {"default": 1.0, "min": 0.01, "max": 3., "step": 0.01}),
+ },
+ "optional": {
+ "mask_optional": ("MASK",),
+ }
+ }
+
+ RETURN_TYPES = ("IMAGE","MASK","IMAGE","IMAGE")
+ RETURN_NAMES = ("IMAGE","MASK","MASK_PREVIEW","SWAPPED_FACE")
+ FUNCTION = "execute"
+ CATEGORY = "🌌 ReActor"
+
+ def execute(self, image, swapped_image, bbox_model_name, bbox_threshold, bbox_dilation, bbox_crop_factor, bbox_drop_size, sam_model_name, sam_dilation, sam_threshold, bbox_expansion, mask_hint_threshold, mask_hint_use_negative, morphology_operation, morphology_distance, blur_radius, sigma_factor, mask_optional=None):
+
+ # images = [image[i:i + 1, ...] for i in range(image.shape[0])]
+
+ images = image
+
+ if mask_optional is None:
+
+ bbox_model_path = folder_paths.get_full_path("ultralytics", bbox_model_name)
+ bbox_model = subcore.load_yolo(bbox_model_path)
+ bbox_detector = subcore.UltraBBoxDetector(bbox_model)
+
+ segs = bbox_detector.detect(images, bbox_threshold, bbox_dilation, bbox_crop_factor, bbox_drop_size, self.detailer_hook)
+
+ if isinstance(self.labels, list):
+ self.labels = str(self.labels[0])
+
+ if self.labels is not None and self.labels != '':
+ self.labels = self.labels.split(',')
+ if len(self.labels) > 0:
+ segs, _ = masking_segs.filter(segs, self.labels)
+ # segs, _ = masking_segs.filter(segs, "all")
+
+ sam_modelname = folder_paths.get_full_path("sams", sam_model_name)
+
+ if 'vit_h' in sam_model_name:
+ model_kind = 'vit_h'
+ elif 'vit_l' in sam_model_name:
+ model_kind = 'vit_l'
+ else:
+ model_kind = 'vit_b'
+
+ sam = sam_model_registry[model_kind](checkpoint=sam_modelname)
+ size = os.path.getsize(sam_modelname)
+ sam.safe_to = core.SafeToGPU(size)
+
+ device = model_management.get_torch_device()
+
+ sam.safe_to.to_device(sam, device)
+
+ sam.is_auto_mode = self.device_mode == "AUTO"
+
+ combined_mask, _ = core.make_sam_mask_segmented(sam, segs, images, self.detection_hint, sam_dilation, sam_threshold, bbox_expansion, mask_hint_threshold, mask_hint_use_negative)
+
+ else:
+ combined_mask = mask_optional
+
+ # *** MASK TO IMAGE ***:
+
+ mask_image = combined_mask.reshape((-1, 1, combined_mask.shape[-2], combined_mask.shape[-1])).movedim(1, -1).expand(-1, -1, -1, 3)
+
+ # *** MASK MORPH ***:
+
+ mask_image = core.tensor2mask(mask_image)
+
+ if morphology_operation == "dilate":
+ mask_image = self.dilate(mask_image, morphology_distance)
+ elif morphology_operation == "erode":
+ mask_image = self.erode(mask_image, morphology_distance)
+ elif morphology_operation == "open":
+ mask_image = self.erode(mask_image, morphology_distance)
+ mask_image = self.dilate(mask_image, morphology_distance)
+ elif morphology_operation == "close":
+ mask_image = self.dilate(mask_image, morphology_distance)
+ mask_image = self.erode(mask_image, morphology_distance)
+
+ # *** MASK BLUR ***:
+
+ if len(mask_image.size()) == 3:
+ mask_image = mask_image.unsqueeze(3)
+
+ mask_image = mask_image.permute(0, 3, 1, 2)
+ kernel_size = blur_radius * 2 + 1
+ sigma = sigma_factor * (0.6 * blur_radius - 0.3)
+ mask_image_final = self.gaussian_blur(mask_image, kernel_size, sigma).permute(0, 2, 3, 1)
+ if mask_image_final.size()[3] == 1:
+ mask_image_final = mask_image_final[:, :, :, 0]
+
+ # *** CUT BY MASK ***:
+
+ if len(swapped_image.shape) < 4:
+ C = 1
+ else:
+ C = swapped_image.shape[3]
+
+ # We operate on RGBA to keep the code clean and then convert back after
+ swapped_image = core.tensor2rgba(swapped_image)
+ mask = core.tensor2mask(mask_image_final)
+
+ # Scale the mask to be a matching size if it isn't
+ B, H, W, _ = swapped_image.shape
+ mask = torch.nn.functional.interpolate(mask.unsqueeze(1), size=(H, W), mode='nearest')[:,0,:,:]
+ MB, _, _ = mask.shape
+
+ if MB < B:
+ assert(B % MB == 0)
+ mask = mask.repeat(B // MB, 1, 1)
+
+ # masks_to_boxes errors if the tensor is all zeros, so we'll add a single pixel and zero it out at the end
+ is_empty = ~torch.gt(torch.max(torch.reshape(mask,[MB, H * W]), dim=1).values, 0.)
+ mask[is_empty,0,0] = 1.
+ boxes = masks_to_boxes(mask)
+ mask[is_empty,0,0] = 0.
+
+ min_x = boxes[:,0]
+ min_y = boxes[:,1]
+ max_x = boxes[:,2]
+ max_y = boxes[:,3]
+
+ width = max_x - min_x + 1
+ height = max_y - min_y + 1
+
+ use_width = int(torch.max(width).item())
+ use_height = int(torch.max(height).item())
+
+ # if self.force_resize_width > 0:
+ # use_width = self.force_resize_width
+
+ # if self.force_resize_height > 0:
+ # use_height = self.force_resize_height
+
+ alpha_mask = torch.ones((B, H, W, 4))
+ alpha_mask[:,:,:,3] = mask
+
+ swapped_image = swapped_image * alpha_mask
+
+ cutted_image = torch.zeros((B, use_height, use_width, 4))
+ for i in range(0, B):
+ if not is_empty[i]:
+ ymin = int(min_y[i].item())
+ ymax = int(max_y[i].item())
+ xmin = int(min_x[i].item())
+ xmax = int(max_x[i].item())
+ single = (swapped_image[i, ymin:ymax+1, xmin:xmax+1,:]).unsqueeze(0)
+ resized = torch.nn.functional.interpolate(single.permute(0, 3, 1, 2), size=(use_height, use_width), mode='bicubic').permute(0, 2, 3, 1)
+ cutted_image[i] = resized[0]
+
+ # Preserve our type unless we were previously RGB and added non-opaque alpha due to the mask size
+ if C == 1:
+ cutted_image = core.tensor2mask(cutted_image)
+ elif C == 3 and torch.min(cutted_image[:,:,:,3]) == 1:
+ cutted_image = core.tensor2rgb(cutted_image)
+
+ # *** PASTE BY MASK ***:
+
+ image_base = core.tensor2rgba(images)
+ image_to_paste = core.tensor2rgba(cutted_image)
+ mask = core.tensor2mask(mask_image_final)
+
+ # Scale the mask to be a matching size if it isn't
+ B, H, W, C = image_base.shape
+ MB = mask.shape[0]
+ PB = image_to_paste.shape[0]
+
+ if B < PB:
+ assert(PB % B == 0)
+ image_base = image_base.repeat(PB // B, 1, 1, 1)
+ B, H, W, C = image_base.shape
+ if MB < B:
+ assert(B % MB == 0)
+ mask = mask.repeat(B // MB, 1, 1)
+ elif B < MB:
+ assert(MB % B == 0)
+ image_base = image_base.repeat(MB // B, 1, 1, 1)
+ if PB < B:
+ assert(B % PB == 0)
+ image_to_paste = image_to_paste.repeat(B // PB, 1, 1, 1)
+
+ mask = torch.nn.functional.interpolate(mask.unsqueeze(1), size=(H, W), mode='nearest')[:,0,:,:]
+ MB, MH, MW = mask.shape
+
+ # masks_to_boxes errors if the tensor is all zeros, so we'll add a single pixel and zero it out at the end
+ is_empty = ~torch.gt(torch.max(torch.reshape(mask,[MB, MH * MW]), dim=1).values, 0.)
+ mask[is_empty,0,0] = 1.
+ boxes = masks_to_boxes(mask)
+ mask[is_empty,0,0] = 0.
+
+ min_x = boxes[:,0]
+ min_y = boxes[:,1]
+ max_x = boxes[:,2]
+ max_y = boxes[:,3]
+ mid_x = (min_x + max_x) / 2
+ mid_y = (min_y + max_y) / 2
+
+ target_width = max_x - min_x + 1
+ target_height = max_y - min_y + 1
+
+ result = image_base.detach().clone()
+ face_segment = mask_image_final
+
+ for i in range(0, MB):
+ if is_empty[i]:
+ continue
+ else:
+ image_index = i
+ source_size = image_to_paste.size()
+ SB, SH, SW, _ = image_to_paste.shape
+
+ # Figure out the desired size
+ width = int(target_width[i].item())
+ height = int(target_height[i].item())
+ # if self.resize_behavior == "keep_ratio_fill":
+ # target_ratio = width / height
+ # actual_ratio = SW / SH
+ # if actual_ratio > target_ratio:
+ # width = int(height * actual_ratio)
+ # elif actual_ratio < target_ratio:
+ # height = int(width / actual_ratio)
+ # elif self.resize_behavior == "keep_ratio_fit":
+ # target_ratio = width / height
+ # actual_ratio = SW / SH
+ # if actual_ratio > target_ratio:
+ # height = int(width / actual_ratio)
+ # elif actual_ratio < target_ratio:
+ # width = int(height * actual_ratio)
+ # elif self.resize_behavior == "source_size" or self.resize_behavior == "source_size_unmasked":
+
+ width = SW
+ height = SH
+
+ # Resize the image we're pasting if needed
+ resized_image = image_to_paste[i].unsqueeze(0)
+ # if SH != height or SW != width:
+ # resized_image = torch.nn.functional.interpolate(resized_image.permute(0, 3, 1, 2), size=(height,width), mode='bicubic').permute(0, 2, 3, 1)
+
+ pasting = torch.ones([H, W, C])
+ ymid = float(mid_y[i].item())
+ ymin = int(math.floor(ymid - height / 2)) + 1
+ ymax = int(math.floor(ymid + height / 2)) + 1
+ xmid = float(mid_x[i].item())
+ xmin = int(math.floor(xmid - width / 2)) + 1
+ xmax = int(math.floor(xmid + width / 2)) + 1
+
+ _, source_ymax, source_xmax, _ = resized_image.shape
+ source_ymin, source_xmin = 0, 0
+
+ if xmin < 0:
+ source_xmin = abs(xmin)
+ xmin = 0
+ if ymin < 0:
+ source_ymin = abs(ymin)
+ ymin = 0
+ if xmax > W:
+ source_xmax -= (xmax - W)
+ xmax = W
+ if ymax > H:
+ source_ymax -= (ymax - H)
+ ymax = H
+
+ pasting[ymin:ymax, xmin:xmax, :] = resized_image[0, source_ymin:source_ymax, source_xmin:source_xmax, :]
+ pasting[:, :, 3] = 1.
+
+ pasting_alpha = torch.zeros([H, W])
+ pasting_alpha[ymin:ymax, xmin:xmax] = resized_image[0, source_ymin:source_ymax, source_xmin:source_xmax, 3]
+
+ # if self.resize_behavior == "keep_ratio_fill" or self.resize_behavior == "source_size_unmasked":
+ # # If we explicitly want to fill the area, we are ok with extending outside
+ # paste_mask = pasting_alpha.unsqueeze(2).repeat(1, 1, 4)
+ # else:
+ # paste_mask = torch.min(pasting_alpha, mask[i]).unsqueeze(2).repeat(1, 1, 4)
+ paste_mask = torch.min(pasting_alpha, mask[i]).unsqueeze(2).repeat(1, 1, 4)
+ result[image_index] = pasting * paste_mask + result[image_index] * (1. - paste_mask)
+
+ face_segment = result
+
+ face_segment[...,3] = mask[i]
+
+ result = rgba2rgb_tensor(result)
+
+ return (result,combined_mask,mask_image_final,face_segment,)
+
+ def gaussian_blur(self, image, kernel_size, sigma):
+ kernel = torch.Tensor(kernel_size, kernel_size).to(device=image.device)
+ center = kernel_size // 2
+ variance = sigma**2
+ for i in range(kernel_size):
+ for j in range(kernel_size):
+ x = i - center
+ y = j - center
+ kernel[i, j] = math.exp(-(x**2 + y**2)/(2*variance))
+ kernel /= kernel.sum()
+
+ # Pad the input tensor
+ padding = (kernel_size - 1) // 2
+ input_pad = torch.nn.functional.pad(image, (padding, padding, padding, padding), mode='reflect')
+
+ # Reshape the padded input tensor for batched convolution
+ batch_size, num_channels, height, width = image.shape
+ input_reshaped = input_pad.reshape(batch_size*num_channels, 1, height+padding*2, width+padding*2)
+
+ # Perform batched convolution with the Gaussian kernel
+ output_reshaped = torch.nn.functional.conv2d(input_reshaped, kernel.unsqueeze(0).unsqueeze(0))
+
+ # Reshape the output tensor to its original shape
+ output_tensor = output_reshaped.reshape(batch_size, num_channels, height, width)
+
+ return output_tensor
+
+ def erode(self, image, distance):
+ return 1. - self.dilate(1. - image, distance)
+
+ def dilate(self, image, distance):
+ kernel_size = 1 + distance * 2
+ # Add the channels dimension
+ image = image.unsqueeze(1)
+ out = torchfn.max_pool2d(image, kernel_size=kernel_size, stride=1, padding=kernel_size // 2).squeeze(1)
+ return out
+
+
+class ImageDublicator:
+ @classmethod
+ def INPUT_TYPES(s):
+ return {
+ "required": {
+ "image": ("IMAGE",),
+ "count": ("INT", {"default": 1, "min": 0}),
+ },
+ }
+
+ RETURN_TYPES = ("IMAGE",)
+ RETURN_NAMES = ("IMAGES",)
+ OUTPUT_IS_LIST = (True,)
+ FUNCTION = "execute"
+ CATEGORY = "🌌 ReActor"
+
+ def execute(self, image, count):
+ images = [image for i in range(count)]
+ return (images,)
+
+
+class ImageRGBA2RGB:
+ @classmethod
+ def INPUT_TYPES(s):
+ return {
+ "required": {
+ "image": ("IMAGE",),
+ },
+ }
+
+ RETURN_TYPES = ("IMAGE",)
+ FUNCTION = "execute"
+ CATEGORY = "🌌 ReActor"
+
+ def execute(self, image):
+ out = rgba2rgb_tensor(image)
+ return (out,)
+
+
+class MakeFaceModelBatch:
+ @classmethod
+ def INPUT_TYPES(s):
+ return {
+ "required": {
+ "face_model1": ("FACE_MODEL",),
+ },
+ "optional": {
+ "face_model2": ("FACE_MODEL",),
+ "face_model3": ("FACE_MODEL",),
+ "face_model4": ("FACE_MODEL",),
+ "face_model5": ("FACE_MODEL",),
+ "face_model6": ("FACE_MODEL",),
+ "face_model7": ("FACE_MODEL",),
+ "face_model8": ("FACE_MODEL",),
+ "face_model9": ("FACE_MODEL",),
+ "face_model10": ("FACE_MODEL",),
+ },
+ }
+
+ RETURN_TYPES = ("FACE_MODEL",)
+ RETURN_NAMES = ("FACE_MODELS",)
+ FUNCTION = "execute"
+
+ CATEGORY = "🌌 ReActor"
+
+ def execute(self, **kwargs):
+ if len(kwargs) > 0:
+ face_models = [value for value in kwargs.values()]
+ return (face_models,)
+ else:
+ logger.error("Please provide at least 1 `face_model`")
+ return (None,)
+
+
+class ReActorOptions:
+ @classmethod
+ def INPUT_TYPES(s):
+ return {
+ "required": {
+ "input_faces_order": (
+ ["left-right","right-left","top-bottom","bottom-top","small-large","large-small"], {"default": "large-small"}
+ ),
+ "input_faces_index": ("STRING", {"default": "0"}),
+ "detect_gender_input": (["no","female","male"], {"default": "no"}),
+ "source_faces_order": (
+ ["left-right","right-left","top-bottom","bottom-top","small-large","large-small"], {"default": "large-small"}
+ ),
+ "source_faces_index": ("STRING", {"default": "0"}),
+ "detect_gender_source": (["no","female","male"], {"default": "no"}),
+ "console_log_level": ([0, 1, 2], {"default": 1}),
+ }
+ }
+
+ RETURN_TYPES = ("OPTIONS",)
+ FUNCTION = "execute"
+ CATEGORY = "🌌 ReActor"
+
+ def execute(self,input_faces_order, input_faces_index, detect_gender_input, source_faces_order, source_faces_index, detect_gender_source, console_log_level):
+ options: dict = {
+ "input_faces_order": input_faces_order,
+ "input_faces_index": input_faces_index,
+ "detect_gender_input": detect_gender_input,
+ "source_faces_order": source_faces_order,
+ "source_faces_index": source_faces_index,
+ "detect_gender_source": detect_gender_source,
+ "console_log_level": console_log_level,
+ }
+ return (options, )
+
+
+class ReActorFaceBoost:
+ @classmethod
+ def INPUT_TYPES(s):
+ return {
+ "required": {
+ "enabled": ("BOOLEAN", {"default": True, "label_off": "OFF", "label_on": "ON"}),
+ "boost_model": (get_model_names(get_restorers),),
+ "interpolation": (["Nearest","Bilinear","Bicubic","Lanczos"], {"default": "Bicubic"}),
+ "visibility": ("FLOAT", {"default": 1, "min": 0.1, "max": 1, "step": 0.05}),
+ "codeformer_weight": ("FLOAT", {"default": 0.5, "min": 0.0, "max": 1, "step": 0.05}),
+ "restore_with_main_after": ("BOOLEAN", {"default": False}),
+ }
+ }
+
+ RETURN_TYPES = ("FACE_BOOST",)
+ FUNCTION = "execute"
+ CATEGORY = "🌌 ReActor"
+
+ def execute(self,enabled,boost_model,interpolation,visibility,codeformer_weight,restore_with_main_after):
+ face_boost: dict = {
+ "enabled": enabled,
+ "boost_model": boost_model,
+ "interpolation": interpolation,
+ "visibility": visibility,
+ "codeformer_weight": codeformer_weight,
+ "restore_with_main_after": restore_with_main_after,
+ }
+ return (face_boost, )
+
+class ReActorUnload:
+ @classmethod
+ def INPUT_TYPES(s):
+ return {
+ "required": {
+ "trigger": ("IMAGE", ),
+ },
+ }
+
+ RETURN_TYPES = ("IMAGE",)
+ FUNCTION = "execute"
+ CATEGORY = "🌌 ReActor"
+
+ def execute(self, trigger):
+ unload_all_models()
+ return (trigger,)
+
+
+NODE_CLASS_MAPPINGS = {
+ # --- MAIN NODES ---
+ "ReActorFaceSwap": reactor,
+ "ReActorFaceSwapOpt": ReActorPlusOpt,
+ "ReActorOptions": ReActorOptions,
+ "ReActorFaceBoost": ReActorFaceBoost,
+ "ReActorMaskHelper": MaskHelper,
+ # --- Operations with Face Models ---
+ "ReActorSaveFaceModel": SaveFaceModel,
+ "ReActorLoadFaceModel": LoadFaceModel,
+ "ReActorBuildFaceModel": BuildFaceModel,
+ "ReActorMakeFaceModelBatch": MakeFaceModelBatch,
+ # --- Additional Nodes ---
+ "ReActorRestoreFace": RestoreFace,
+ "ReActorImageDublicator": ImageDublicator,
+ "ImageRGBA2RGB": ImageRGBA2RGB,
+ "ReActorUnload": ReActorUnload,
+}
+
+NODE_DISPLAY_NAME_MAPPINGS = {
+ # --- MAIN NODES ---
+ "ReActorFaceSwap": "ReActor 🌌 Fast Face Swap",
+ "ReActorFaceSwapOpt": "ReActor 🌌 Fast Face Swap [OPTIONS]",
+ "ReActorOptions": "ReActor 🌌 Options",
+ "ReActorFaceBoost": "ReActor 🌌 Face Booster",
+ "ReActorMaskHelper": "ReActor 🌌 Masking Helper",
+ # --- Operations with Face Models ---
+ "ReActorSaveFaceModel": "Save Face Model 🌌 ReActor",
+ "ReActorLoadFaceModel": "Load Face Model 🌌 ReActor",
+ "ReActorBuildFaceModel": "Build Blended Face Model 🌌 ReActor",
+ "ReActorMakeFaceModelBatch": "Make Face Model Batch 🌌 ReActor",
+ # --- Additional Nodes ---
+ "ReActorRestoreFace": "Restore Face 🌌 ReActor",
+ "ReActorImageDublicator": "Image Dublicator (List) 🌌 ReActor",
+ "ImageRGBA2RGB": "Convert RGBA to RGB 🌌 ReActor",
+ "ReActorUnload": "Unload ReActor Models 🌌 ReActor",
+}
diff --git a/pyproject.toml.off b/pyproject.toml.off
new file mode 100644
index 0000000..819e929
--- /dev/null
+++ b/pyproject.toml.off
@@ -0,0 +1,15 @@
+[project]
+name = "comfyui-reactor-node"
+description = "The Fast and Simple Face Swap Extension Node for ComfyUI, based on ReActor SD-WebUI Face Swap Extension"
+version = "0.5.2-a2"
+license = { file = "LICENSE" }
+dependencies = ["insightface==0.7.3", "onnx>=1.14.0", "opencv-python>=4.7.0.72", "numpy==1.26.3", "segment_anything", "albumentations>=1.4.16", "ultralytics"]
+
+[project.urls]
+Repository = "https://github.com/Gourieff/ComfyUI-ReActor"
+# Used by Comfy Registry https://comfyregistry.org
+
+[tool.comfy]
+PublisherId = "gourieff"
+DisplayName = "comfyui-reactor-node"
+Icon = ""
diff --git a/r_chainner/archs/face/gfpganv1_clean_arch.py b/r_chainner/archs/face/gfpganv1_clean_arch.py
new file mode 100644
index 0000000..f3c4d49
--- /dev/null
+++ b/r_chainner/archs/face/gfpganv1_clean_arch.py
@@ -0,0 +1,370 @@
+# pylint: skip-file
+# type: ignore
+import math
+import random
+
+import torch
+from torch import nn
+from torch.nn import functional as F
+
+from r_chainner.archs.face.stylegan2_clean_arch import StyleGAN2GeneratorClean
+
+
+class StyleGAN2GeneratorCSFT(StyleGAN2GeneratorClean):
+ """StyleGAN2 Generator with SFT modulation (Spatial Feature Transform).
+ It is the clean version without custom compiled CUDA extensions used in StyleGAN2.
+ Args:
+ out_size (int): The spatial size of outputs.
+ num_style_feat (int): Channel number of style features. Default: 512.
+ num_mlp (int): Layer number of MLP style layers. Default: 8.
+ channel_multiplier (int): Channel multiplier for large networks of StyleGAN2. Default: 2.
+ narrow (float): The narrow ratio for channels. Default: 1.
+ sft_half (bool): Whether to apply SFT on half of the input channels. Default: False.
+ """
+
+ def __init__(
+ self,
+ out_size,
+ num_style_feat=512,
+ num_mlp=8,
+ channel_multiplier=2,
+ narrow=1,
+ sft_half=False,
+ ):
+ super(StyleGAN2GeneratorCSFT, self).__init__(
+ out_size,
+ num_style_feat=num_style_feat,
+ num_mlp=num_mlp,
+ channel_multiplier=channel_multiplier,
+ narrow=narrow,
+ )
+ self.sft_half = sft_half
+
+ def forward(
+ self,
+ styles,
+ conditions,
+ input_is_latent=False,
+ noise=None,
+ randomize_noise=True,
+ truncation=1,
+ truncation_latent=None,
+ inject_index=None,
+ return_latents=False,
+ ):
+ """Forward function for StyleGAN2GeneratorCSFT.
+ Args:
+ styles (list[Tensor]): Sample codes of styles.
+ conditions (list[Tensor]): SFT conditions to generators.
+ input_is_latent (bool): Whether input is latent style. Default: False.
+ noise (Tensor | None): Input noise or None. Default: None.
+ randomize_noise (bool): Randomize noise, used when 'noise' is False. Default: True.
+ truncation (float): The truncation ratio. Default: 1.
+ truncation_latent (Tensor | None): The truncation latent tensor. Default: None.
+ inject_index (int | None): The injection index for mixing noise. Default: None.
+ return_latents (bool): Whether to return style latents. Default: False.
+ """
+ # style codes -> latents with Style MLP layer
+ if not input_is_latent:
+ styles = [self.style_mlp(s) for s in styles]
+ # noises
+ if noise is None:
+ if randomize_noise:
+ noise = [None] * self.num_layers # for each style conv layer
+ else: # use the stored noise
+ noise = [
+ getattr(self.noises, f"noise{i}") for i in range(self.num_layers)
+ ]
+ # style truncation
+ if truncation < 1:
+ style_truncation = []
+ for style in styles:
+ style_truncation.append(
+ truncation_latent + truncation * (style - truncation_latent)
+ )
+ styles = style_truncation
+ # get style latents with injection
+ if len(styles) == 1:
+ inject_index = self.num_latent
+
+ if styles[0].ndim < 3:
+ # repeat latent code for all the layers
+ latent = styles[0].unsqueeze(1).repeat(1, inject_index, 1)
+ else: # used for encoder with different latent code for each layer
+ latent = styles[0]
+ elif len(styles) == 2: # mixing noises
+ if inject_index is None:
+ inject_index = random.randint(1, self.num_latent - 1)
+ latent1 = styles[0].unsqueeze(1).repeat(1, inject_index, 1)
+ latent2 = (
+ styles[1].unsqueeze(1).repeat(1, self.num_latent - inject_index, 1)
+ )
+ latent = torch.cat([latent1, latent2], 1)
+
+ # main generation
+ out = self.constant_input(latent.shape[0])
+ out = self.style_conv1(out, latent[:, 0], noise=noise[0])
+ skip = self.to_rgb1(out, latent[:, 1])
+
+ i = 1
+ for conv1, conv2, noise1, noise2, to_rgb in zip(
+ self.style_convs[::2],
+ self.style_convs[1::2],
+ noise[1::2],
+ noise[2::2],
+ self.to_rgbs,
+ ):
+ out = conv1(out, latent[:, i], noise=noise1)
+
+ # the conditions may have fewer levels
+ if i < len(conditions):
+ # SFT part to combine the conditions
+ if self.sft_half: # only apply SFT to half of the channels
+ out_same, out_sft = torch.split(out, int(out.size(1) // 2), dim=1)
+ out_sft = out_sft * conditions[i - 1] + conditions[i]
+ out = torch.cat([out_same, out_sft], dim=1)
+ else: # apply SFT to all the channels
+ out = out * conditions[i - 1] + conditions[i]
+
+ out = conv2(out, latent[:, i + 1], noise=noise2)
+ skip = to_rgb(out, latent[:, i + 2], skip) # feature back to the rgb space
+ i += 2
+
+ image = skip
+
+ if return_latents:
+ return image, latent
+ else:
+ return image, None
+
+
+class ResBlock(nn.Module):
+ """Residual block with bilinear upsampling/downsampling.
+ Args:
+ in_channels (int): Channel number of the input.
+ out_channels (int): Channel number of the output.
+ mode (str): Upsampling/downsampling mode. Options: down | up. Default: down.
+ """
+
+ def __init__(self, in_channels, out_channels, mode="down"):
+ super(ResBlock, self).__init__()
+
+ self.conv1 = nn.Conv2d(in_channels, in_channels, 3, 1, 1)
+ self.conv2 = nn.Conv2d(in_channels, out_channels, 3, 1, 1)
+ self.skip = nn.Conv2d(in_channels, out_channels, 1, bias=False)
+ if mode == "down":
+ self.scale_factor = 0.5
+ elif mode == "up":
+ self.scale_factor = 2
+
+ def forward(self, x):
+ out = F.leaky_relu_(self.conv1(x), negative_slope=0.2)
+ # upsample/downsample
+ out = F.interpolate(
+ out, scale_factor=self.scale_factor, mode="bilinear", align_corners=False
+ )
+ out = F.leaky_relu_(self.conv2(out), negative_slope=0.2)
+ # skip
+ x = F.interpolate(
+ x, scale_factor=self.scale_factor, mode="bilinear", align_corners=False
+ )
+ skip = self.skip(x)
+ out = out + skip
+ return out
+
+
+class GFPGANv1Clean(nn.Module):
+ """The GFPGAN architecture: Unet + StyleGAN2 decoder with SFT.
+ It is the clean version without custom compiled CUDA extensions used in StyleGAN2.
+ Ref: GFP-GAN: Towards Real-World Blind Face Restoration with Generative Facial Prior.
+ Args:
+ out_size (int): The spatial size of outputs.
+ num_style_feat (int): Channel number of style features. Default: 512.
+ channel_multiplier (int): Channel multiplier for large networks of StyleGAN2. Default: 2.
+ decoder_load_path (str): The path to the pre-trained decoder model (usually, the StyleGAN2). Default: None.
+ fix_decoder (bool): Whether to fix the decoder. Default: True.
+ num_mlp (int): Layer number of MLP style layers. Default: 8.
+ input_is_latent (bool): Whether input is latent style. Default: False.
+ different_w (bool): Whether to use different latent w for different layers. Default: False.
+ narrow (float): The narrow ratio for channels. Default: 1.
+ sft_half (bool): Whether to apply SFT on half of the input channels. Default: False.
+ """
+
+ def __init__(
+ self,
+ state_dict,
+ ):
+ super(GFPGANv1Clean, self).__init__()
+
+ out_size = 512
+ num_style_feat = 512
+ channel_multiplier = 2
+ decoder_load_path = None
+ fix_decoder = False
+ num_mlp = 8
+ input_is_latent = True
+ different_w = True
+ narrow = 1
+ sft_half = True
+
+ self.model_arch = "GFPGAN"
+ self.sub_type = "Face SR"
+ self.scale = 8
+ self.in_nc = 3
+ self.out_nc = 3
+ self.state = state_dict
+
+ self.supports_fp16 = False
+ self.supports_bf16 = True
+ self.min_size_restriction = 512
+
+ self.input_is_latent = input_is_latent
+ self.different_w = different_w
+ self.num_style_feat = num_style_feat
+
+ unet_narrow = narrow * 0.5 # by default, use a half of input channels
+ channels = {
+ "4": int(512 * unet_narrow),
+ "8": int(512 * unet_narrow),
+ "16": int(512 * unet_narrow),
+ "32": int(512 * unet_narrow),
+ "64": int(256 * channel_multiplier * unet_narrow),
+ "128": int(128 * channel_multiplier * unet_narrow),
+ "256": int(64 * channel_multiplier * unet_narrow),
+ "512": int(32 * channel_multiplier * unet_narrow),
+ "1024": int(16 * channel_multiplier * unet_narrow),
+ }
+
+ self.log_size = int(math.log(out_size, 2))
+ first_out_size = 2 ** (int(math.log(out_size, 2)))
+
+ self.conv_body_first = nn.Conv2d(3, channels[f"{first_out_size}"], 1)
+
+ # downsample
+ in_channels = channels[f"{first_out_size}"]
+ self.conv_body_down = nn.ModuleList()
+ for i in range(self.log_size, 2, -1):
+ out_channels = channels[f"{2**(i - 1)}"]
+ self.conv_body_down.append(ResBlock(in_channels, out_channels, mode="down"))
+ in_channels = out_channels
+
+ self.final_conv = nn.Conv2d(in_channels, channels["4"], 3, 1, 1)
+
+ # upsample
+ in_channels = channels["4"]
+ self.conv_body_up = nn.ModuleList()
+ for i in range(3, self.log_size + 1):
+ out_channels = channels[f"{2**i}"]
+ self.conv_body_up.append(ResBlock(in_channels, out_channels, mode="up"))
+ in_channels = out_channels
+
+ # to RGB
+ self.toRGB = nn.ModuleList()
+ for i in range(3, self.log_size + 1):
+ self.toRGB.append(nn.Conv2d(channels[f"{2**i}"], 3, 1))
+
+ if different_w:
+ linear_out_channel = (int(math.log(out_size, 2)) * 2 - 2) * num_style_feat
+ else:
+ linear_out_channel = num_style_feat
+
+ self.final_linear = nn.Linear(channels["4"] * 4 * 4, linear_out_channel)
+
+ # the decoder: stylegan2 generator with SFT modulations
+ self.stylegan_decoder = StyleGAN2GeneratorCSFT(
+ out_size=out_size,
+ num_style_feat=num_style_feat,
+ num_mlp=num_mlp,
+ channel_multiplier=channel_multiplier,
+ narrow=narrow,
+ sft_half=sft_half,
+ )
+
+ # load pre-trained stylegan2 model if necessary
+ if decoder_load_path:
+ self.stylegan_decoder.load_state_dict(
+ torch.load(
+ decoder_load_path, map_location=lambda storage, loc: storage
+ )["params_ema"]
+ )
+ # fix decoder without updating params
+ if fix_decoder:
+ for _, param in self.stylegan_decoder.named_parameters():
+ param.requires_grad = False
+
+ # for SFT modulations (scale and shift)
+ self.condition_scale = nn.ModuleList()
+ self.condition_shift = nn.ModuleList()
+ for i in range(3, self.log_size + 1):
+ out_channels = channels[f"{2**i}"]
+ if sft_half:
+ sft_out_channels = out_channels
+ else:
+ sft_out_channels = out_channels * 2
+ self.condition_scale.append(
+ nn.Sequential(
+ nn.Conv2d(out_channels, out_channels, 3, 1, 1),
+ nn.LeakyReLU(0.2, True),
+ nn.Conv2d(out_channels, sft_out_channels, 3, 1, 1),
+ )
+ )
+ self.condition_shift.append(
+ nn.Sequential(
+ nn.Conv2d(out_channels, out_channels, 3, 1, 1),
+ nn.LeakyReLU(0.2, True),
+ nn.Conv2d(out_channels, sft_out_channels, 3, 1, 1),
+ )
+ )
+ self.load_state_dict(state_dict)
+
+ def forward(
+ self, x, return_latents=False, return_rgb=True, randomize_noise=True, **kwargs
+ ):
+ """Forward function for GFPGANv1Clean.
+ Args:
+ x (Tensor): Input images.
+ return_latents (bool): Whether to return style latents. Default: False.
+ return_rgb (bool): Whether return intermediate rgb images. Default: True.
+ randomize_noise (bool): Randomize noise, used when 'noise' is False. Default: True.
+ """
+ conditions = []
+ unet_skips = []
+ out_rgbs = []
+
+ # encoder
+ feat = F.leaky_relu_(self.conv_body_first(x), negative_slope=0.2)
+ for i in range(self.log_size - 2):
+ feat = self.conv_body_down[i](feat)
+ unet_skips.insert(0, feat)
+ feat = F.leaky_relu_(self.final_conv(feat), negative_slope=0.2)
+
+ # style code
+ style_code = self.final_linear(feat.view(feat.size(0), -1))
+ if self.different_w:
+ style_code = style_code.view(style_code.size(0), -1, self.num_style_feat)
+
+ # decode
+ for i in range(self.log_size - 2):
+ # add unet skip
+ feat = feat + unet_skips[i]
+ # ResUpLayer
+ feat = self.conv_body_up[i](feat)
+ # generate scale and shift for SFT layers
+ scale = self.condition_scale[i](feat)
+ conditions.append(scale.clone())
+ shift = self.condition_shift[i](feat)
+ conditions.append(shift.clone())
+ # generate rgb images
+ if return_rgb:
+ out_rgbs.append(self.toRGB[i](feat))
+
+ # decoder
+ image, _ = self.stylegan_decoder(
+ [style_code],
+ conditions,
+ return_latents=return_latents,
+ input_is_latent=self.input_is_latent,
+ randomize_noise=randomize_noise,
+ )
+
+ return image, out_rgbs
diff --git a/r_chainner/archs/face/stylegan2_clean_arch.py b/r_chainner/archs/face/stylegan2_clean_arch.py
new file mode 100644
index 0000000..a68655a
--- /dev/null
+++ b/r_chainner/archs/face/stylegan2_clean_arch.py
@@ -0,0 +1,453 @@
+# pylint: skip-file
+# type: ignore
+import math
+
+import torch
+from torch import nn
+from torch.nn import functional as F
+from torch.nn import init
+from torch.nn.modules.batchnorm import _BatchNorm
+
+
+@torch.no_grad()
+def default_init_weights(module_list, scale=1, bias_fill=0, **kwargs):
+ """Initialize network weights.
+ Args:
+ module_list (list[nn.Module] | nn.Module): Modules to be initialized.
+ scale (float): Scale initialized weights, especially for residual
+ blocks. Default: 1.
+ bias_fill (float): The value to fill bias. Default: 0
+ kwargs (dict): Other arguments for initialization function.
+ """
+ if not isinstance(module_list, list):
+ module_list = [module_list]
+ for module in module_list:
+ for m in module.modules():
+ if isinstance(m, nn.Conv2d):
+ init.kaiming_normal_(m.weight, **kwargs)
+ m.weight.data *= scale
+ if m.bias is not None:
+ m.bias.data.fill_(bias_fill)
+ elif isinstance(m, nn.Linear):
+ init.kaiming_normal_(m.weight, **kwargs)
+ m.weight.data *= scale
+ if m.bias is not None:
+ m.bias.data.fill_(bias_fill)
+ elif isinstance(m, _BatchNorm):
+ init.constant_(m.weight, 1)
+ if m.bias is not None:
+ m.bias.data.fill_(bias_fill)
+
+
+class NormStyleCode(nn.Module):
+ def forward(self, x):
+ """Normalize the style codes.
+ Args:
+ x (Tensor): Style codes with shape (b, c).
+ Returns:
+ Tensor: Normalized tensor.
+ """
+ return x * torch.rsqrt(torch.mean(x**2, dim=1, keepdim=True) + 1e-8)
+
+
+class ModulatedConv2d(nn.Module):
+ """Modulated Conv2d used in StyleGAN2.
+ There is no bias in ModulatedConv2d.
+ Args:
+ in_channels (int): Channel number of the input.
+ out_channels (int): Channel number of the output.
+ kernel_size (int): Size of the convolving kernel.
+ num_style_feat (int): Channel number of style features.
+ demodulate (bool): Whether to demodulate in the conv layer. Default: True.
+ sample_mode (str | None): Indicating 'upsample', 'downsample' or None. Default: None.
+ eps (float): A value added to the denominator for numerical stability. Default: 1e-8.
+ """
+
+ def __init__(
+ self,
+ in_channels,
+ out_channels,
+ kernel_size,
+ num_style_feat,
+ demodulate=True,
+ sample_mode=None,
+ eps=1e-8,
+ ):
+ super(ModulatedConv2d, self).__init__()
+ self.in_channels = in_channels
+ self.out_channels = out_channels
+ self.kernel_size = kernel_size
+ self.demodulate = demodulate
+ self.sample_mode = sample_mode
+ self.eps = eps
+
+ # modulation inside each modulated conv
+ self.modulation = nn.Linear(num_style_feat, in_channels, bias=True)
+ # initialization
+ default_init_weights(
+ self.modulation,
+ scale=1,
+ bias_fill=1,
+ a=0,
+ mode="fan_in",
+ nonlinearity="linear",
+ )
+
+ self.weight = nn.Parameter(
+ torch.randn(1, out_channels, in_channels, kernel_size, kernel_size)
+ / math.sqrt(in_channels * kernel_size**2)
+ )
+ self.padding = kernel_size // 2
+
+ def forward(self, x, style):
+ """Forward function.
+ Args:
+ x (Tensor): Tensor with shape (b, c, h, w).
+ style (Tensor): Tensor with shape (b, num_style_feat).
+ Returns:
+ Tensor: Modulated tensor after convolution.
+ """
+ b, c, h, w = x.shape # c = c_in
+ # weight modulation
+ style = self.modulation(style).view(b, 1, c, 1, 1)
+ # self.weight: (1, c_out, c_in, k, k); style: (b, 1, c, 1, 1)
+ weight = self.weight * style # (b, c_out, c_in, k, k)
+
+ if self.demodulate:
+ demod = torch.rsqrt(weight.pow(2).sum([2, 3, 4]) + self.eps)
+ weight = weight * demod.view(b, self.out_channels, 1, 1, 1)
+
+ weight = weight.view(
+ b * self.out_channels, c, self.kernel_size, self.kernel_size
+ )
+
+ # upsample or downsample if necessary
+ if self.sample_mode == "upsample":
+ x = F.interpolate(x, scale_factor=2, mode="bilinear", align_corners=False)
+ elif self.sample_mode == "downsample":
+ x = F.interpolate(x, scale_factor=0.5, mode="bilinear", align_corners=False)
+
+ b, c, h, w = x.shape
+ x = x.view(1, b * c, h, w)
+ # weight: (b*c_out, c_in, k, k), groups=b
+ out = F.conv2d(x, weight, padding=self.padding, groups=b)
+ out = out.view(b, self.out_channels, *out.shape[2:4])
+
+ return out
+
+ def __repr__(self):
+ return (
+ f"{self.__class__.__name__}(in_channels={self.in_channels}, out_channels={self.out_channels}, "
+ f"kernel_size={self.kernel_size}, demodulate={self.demodulate}, sample_mode={self.sample_mode})"
+ )
+
+
+class StyleConv(nn.Module):
+ """Style conv used in StyleGAN2.
+ Args:
+ in_channels (int): Channel number of the input.
+ out_channels (int): Channel number of the output.
+ kernel_size (int): Size of the convolving kernel.
+ num_style_feat (int): Channel number of style features.
+ demodulate (bool): Whether demodulate in the conv layer. Default: True.
+ sample_mode (str | None): Indicating 'upsample', 'downsample' or None. Default: None.
+ """
+
+ def __init__(
+ self,
+ in_channels,
+ out_channels,
+ kernel_size,
+ num_style_feat,
+ demodulate=True,
+ sample_mode=None,
+ ):
+ super(StyleConv, self).__init__()
+ self.modulated_conv = ModulatedConv2d(
+ in_channels,
+ out_channels,
+ kernel_size,
+ num_style_feat,
+ demodulate=demodulate,
+ sample_mode=sample_mode,
+ )
+ self.weight = nn.Parameter(torch.zeros(1)) # for noise injection
+ self.bias = nn.Parameter(torch.zeros(1, out_channels, 1, 1))
+ self.activate = nn.LeakyReLU(negative_slope=0.2, inplace=True)
+
+ def forward(self, x, style, noise=None):
+ # modulate
+ out = self.modulated_conv(x, style) * 2**0.5 # for conversion
+ # noise injection
+ if noise is None:
+ b, _, h, w = out.shape
+ noise = out.new_empty(b, 1, h, w).normal_()
+ out = out + self.weight * noise
+ # add bias
+ out = out + self.bias
+ # activation
+ out = self.activate(out)
+ return out
+
+
+class ToRGB(nn.Module):
+ """To RGB (image space) from features.
+ Args:
+ in_channels (int): Channel number of input.
+ num_style_feat (int): Channel number of style features.
+ upsample (bool): Whether to upsample. Default: True.
+ """
+
+ def __init__(self, in_channels, num_style_feat, upsample=True):
+ super(ToRGB, self).__init__()
+ self.upsample = upsample
+ self.modulated_conv = ModulatedConv2d(
+ in_channels,
+ 3,
+ kernel_size=1,
+ num_style_feat=num_style_feat,
+ demodulate=False,
+ sample_mode=None,
+ )
+ self.bias = nn.Parameter(torch.zeros(1, 3, 1, 1))
+
+ def forward(self, x, style, skip=None):
+ """Forward function.
+ Args:
+ x (Tensor): Feature tensor with shape (b, c, h, w).
+ style (Tensor): Tensor with shape (b, num_style_feat).
+ skip (Tensor): Base/skip tensor. Default: None.
+ Returns:
+ Tensor: RGB images.
+ """
+ out = self.modulated_conv(x, style)
+ out = out + self.bias
+ if skip is not None:
+ if self.upsample:
+ skip = F.interpolate(
+ skip, scale_factor=2, mode="bilinear", align_corners=False
+ )
+ out = out + skip
+ return out
+
+
+class ConstantInput(nn.Module):
+ """Constant input.
+ Args:
+ num_channel (int): Channel number of constant input.
+ size (int): Spatial size of constant input.
+ """
+
+ def __init__(self, num_channel, size):
+ super(ConstantInput, self).__init__()
+ self.weight = nn.Parameter(torch.randn(1, num_channel, size, size))
+
+ def forward(self, batch):
+ out = self.weight.repeat(batch, 1, 1, 1)
+ return out
+
+
+class StyleGAN2GeneratorClean(nn.Module):
+ """Clean version of StyleGAN2 Generator.
+ Args:
+ out_size (int): The spatial size of outputs.
+ num_style_feat (int): Channel number of style features. Default: 512.
+ num_mlp (int): Layer number of MLP style layers. Default: 8.
+ channel_multiplier (int): Channel multiplier for large networks of StyleGAN2. Default: 2.
+ narrow (float): Narrow ratio for channels. Default: 1.0.
+ """
+
+ def __init__(
+ self, out_size, num_style_feat=512, num_mlp=8, channel_multiplier=2, narrow=1
+ ):
+ super(StyleGAN2GeneratorClean, self).__init__()
+ # Style MLP layers
+ self.num_style_feat = num_style_feat
+ style_mlp_layers = [NormStyleCode()]
+ for i in range(num_mlp):
+ style_mlp_layers.extend(
+ [
+ nn.Linear(num_style_feat, num_style_feat, bias=True),
+ nn.LeakyReLU(negative_slope=0.2, inplace=True),
+ ]
+ )
+ self.style_mlp = nn.Sequential(*style_mlp_layers)
+ # initialization
+ default_init_weights(
+ self.style_mlp,
+ scale=1,
+ bias_fill=0,
+ a=0.2,
+ mode="fan_in",
+ nonlinearity="leaky_relu",
+ )
+
+ # channel list
+ channels = {
+ "4": int(512 * narrow),
+ "8": int(512 * narrow),
+ "16": int(512 * narrow),
+ "32": int(512 * narrow),
+ "64": int(256 * channel_multiplier * narrow),
+ "128": int(128 * channel_multiplier * narrow),
+ "256": int(64 * channel_multiplier * narrow),
+ "512": int(32 * channel_multiplier * narrow),
+ "1024": int(16 * channel_multiplier * narrow),
+ }
+ self.channels = channels
+
+ self.constant_input = ConstantInput(channels["4"], size=4)
+ self.style_conv1 = StyleConv(
+ channels["4"],
+ channels["4"],
+ kernel_size=3,
+ num_style_feat=num_style_feat,
+ demodulate=True,
+ sample_mode=None,
+ )
+ self.to_rgb1 = ToRGB(channels["4"], num_style_feat, upsample=False)
+
+ self.log_size = int(math.log(out_size, 2))
+ self.num_layers = (self.log_size - 2) * 2 + 1
+ self.num_latent = self.log_size * 2 - 2
+
+ self.style_convs = nn.ModuleList()
+ self.to_rgbs = nn.ModuleList()
+ self.noises = nn.Module()
+
+ in_channels = channels["4"]
+ # noise
+ for layer_idx in range(self.num_layers):
+ resolution = 2 ** ((layer_idx + 5) // 2)
+ shape = [1, 1, resolution, resolution]
+ self.noises.register_buffer(f"noise{layer_idx}", torch.randn(*shape))
+ # style convs and to_rgbs
+ for i in range(3, self.log_size + 1):
+ out_channels = channels[f"{2**i}"]
+ self.style_convs.append(
+ StyleConv(
+ in_channels,
+ out_channels,
+ kernel_size=3,
+ num_style_feat=num_style_feat,
+ demodulate=True,
+ sample_mode="upsample",
+ )
+ )
+ self.style_convs.append(
+ StyleConv(
+ out_channels,
+ out_channels,
+ kernel_size=3,
+ num_style_feat=num_style_feat,
+ demodulate=True,
+ sample_mode=None,
+ )
+ )
+ self.to_rgbs.append(ToRGB(out_channels, num_style_feat, upsample=True))
+ in_channels = out_channels
+
+ def make_noise(self):
+ """Make noise for noise injection."""
+ device = self.constant_input.weight.device
+ noises = [torch.randn(1, 1, 4, 4, device=device)]
+
+ for i in range(3, self.log_size + 1):
+ for _ in range(2):
+ noises.append(torch.randn(1, 1, 2**i, 2**i, device=device))
+
+ return noises
+
+ def get_latent(self, x):
+ return self.style_mlp(x)
+
+ def mean_latent(self, num_latent):
+ latent_in = torch.randn(
+ num_latent, self.num_style_feat, device=self.constant_input.weight.device
+ )
+ latent = self.style_mlp(latent_in).mean(0, keepdim=True)
+ return latent
+
+ def forward(
+ self,
+ styles,
+ input_is_latent=False,
+ noise=None,
+ randomize_noise=True,
+ truncation=1,
+ truncation_latent=None,
+ inject_index=None,
+ return_latents=False,
+ ):
+ """Forward function for StyleGAN2GeneratorClean.
+ Args:
+ styles (list[Tensor]): Sample codes of styles.
+ input_is_latent (bool): Whether input is latent style. Default: False.
+ noise (Tensor | None): Input noise or None. Default: None.
+ randomize_noise (bool): Randomize noise, used when 'noise' is False. Default: True.
+ truncation (float): The truncation ratio. Default: 1.
+ truncation_latent (Tensor | None): The truncation latent tensor. Default: None.
+ inject_index (int | None): The injection index for mixing noise. Default: None.
+ return_latents (bool): Whether to return style latents. Default: False.
+ """
+ # style codes -> latents with Style MLP layer
+ if not input_is_latent:
+ styles = [self.style_mlp(s) for s in styles]
+ # noises
+ if noise is None:
+ if randomize_noise:
+ noise = [None] * self.num_layers # for each style conv layer
+ else: # use the stored noise
+ noise = [
+ getattr(self.noises, f"noise{i}") for i in range(self.num_layers)
+ ]
+ # style truncation
+ if truncation < 1:
+ style_truncation = []
+ for style in styles:
+ style_truncation.append(
+ truncation_latent + truncation * (style - truncation_latent)
+ )
+ styles = style_truncation
+ # get style latents with injection
+ if len(styles) == 1:
+ inject_index = self.num_latent
+
+ if styles[0].ndim < 3:
+ # repeat latent code for all the layers
+ latent = styles[0].unsqueeze(1).repeat(1, inject_index, 1)
+ else: # used for encoder with different latent code for each layer
+ latent = styles[0]
+ elif len(styles) == 2: # mixing noises
+ if inject_index is None:
+ inject_index = random.randint(1, self.num_latent - 1)
+ latent1 = styles[0].unsqueeze(1).repeat(1, inject_index, 1)
+ latent2 = (
+ styles[1].unsqueeze(1).repeat(1, self.num_latent - inject_index, 1)
+ )
+ latent = torch.cat([latent1, latent2], 1)
+
+ # main generation
+ out = self.constant_input(latent.shape[0])
+ out = self.style_conv1(out, latent[:, 0], noise=noise[0])
+ skip = self.to_rgb1(out, latent[:, 1])
+
+ i = 1
+ for conv1, conv2, noise1, noise2, to_rgb in zip(
+ self.style_convs[::2],
+ self.style_convs[1::2],
+ noise[1::2],
+ noise[2::2],
+ self.to_rgbs,
+ ):
+ out = conv1(out, latent[:, i], noise=noise1)
+ out = conv2(out, latent[:, i + 1], noise=noise2)
+ skip = to_rgb(out, latent[:, i + 2], skip) # feature back to the rgb space
+ i += 2
+
+ image = skip
+
+ if return_latents:
+ return image, latent
+ else:
+ return image, None
diff --git a/r_chainner/model_loading.py b/r_chainner/model_loading.py
new file mode 100644
index 0000000..21fd51d
--- /dev/null
+++ b/r_chainner/model_loading.py
@@ -0,0 +1,28 @@
+from r_chainner.archs.face.gfpganv1_clean_arch import GFPGANv1Clean
+from r_chainner.types import PyTorchModel
+
+
+class UnsupportedModel(Exception):
+ pass
+
+
+def load_state_dict(state_dict) -> PyTorchModel:
+
+ state_dict_keys = list(state_dict.keys())
+
+ if "params_ema" in state_dict_keys:
+ state_dict = state_dict["params_ema"]
+ elif "params-ema" in state_dict_keys:
+ state_dict = state_dict["params-ema"]
+ elif "params" in state_dict_keys:
+ state_dict = state_dict["params"]
+
+ state_dict_keys = list(state_dict.keys())
+
+ # GFPGAN
+ if (
+ "toRGB.0.weight" in state_dict_keys
+ and "stylegan_decoder.style_mlp.1.weight" in state_dict_keys
+ ):
+ model = GFPGANv1Clean(state_dict)
+ return model
diff --git a/r_chainner/types.py b/r_chainner/types.py
new file mode 100644
index 0000000..73e6a28
--- /dev/null
+++ b/r_chainner/types.py
@@ -0,0 +1,18 @@
+from typing import Union
+
+from r_chainner.archs.face.gfpganv1_clean_arch import GFPGANv1Clean
+
+
+PyTorchFaceModels = (GFPGANv1Clean,)
+PyTorchFaceModel = Union[GFPGANv1Clean]
+
+
+def is_pytorch_face_model(model: object):
+ return isinstance(model, PyTorchFaceModels)
+
+PyTorchModels = (*PyTorchFaceModels, )
+PyTorchModel = Union[PyTorchFaceModel]
+
+
+def is_pytorch_model(model: object):
+ return isinstance(model, PyTorchModels)
diff --git a/r_facelib/__init__.py b/r_facelib/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/r_facelib/__init__.py
diff --git a/r_facelib/detection/__init__.py b/r_facelib/detection/__init__.py
new file mode 100644
index 0000000..3c953bd
--- /dev/null
+++ b/r_facelib/detection/__init__.py
@@ -0,0 +1,102 @@
+import os
+import torch
+from torch import nn
+from copy import deepcopy
+import pathlib
+
+from r_facelib.utils import load_file_from_url
+from r_facelib.utils import download_pretrained_models
+from r_facelib.detection.yolov5face.models.common import Conv
+
+from .retinaface.retinaface import RetinaFace
+from .yolov5face.face_detector import YoloDetector
+
+
+def init_detection_model(model_name, half=False, device='cuda'):
+ if 'retinaface' in model_name:
+ model = init_retinaface_model(model_name, half, device)
+ elif 'YOLOv5' in model_name:
+ model = init_yolov5face_model(model_name, device)
+ else:
+ raise NotImplementedError(f'{model_name} is not implemented.')
+
+ return model
+
+
+def init_retinaface_model(model_name, half=False, device='cuda'):
+ if model_name == 'retinaface_resnet50':
+ model = RetinaFace(network_name='resnet50', half=half)
+ model_url = 'https://github.com/xinntao/facexlib/releases/download/v0.1.0/detection_Resnet50_Final.pth'
+ elif model_name == 'retinaface_mobile0.25':
+ model = RetinaFace(network_name='mobile0.25', half=half)
+ model_url = 'https://github.com/xinntao/facexlib/releases/download/v0.1.0/detection_mobilenet0.25_Final.pth'
+ else:
+ raise NotImplementedError(f'{model_name} is not implemented.')
+
+ model_path = load_file_from_url(url=model_url, model_dir='../../models/facedetection', progress=True, file_name=None)
+ load_net = torch.load(model_path, map_location=lambda storage, loc: storage)
+ # remove unnecessary 'module.'
+ for k, v in deepcopy(load_net).items():
+ if k.startswith('module.'):
+ load_net[k[7:]] = v
+ load_net.pop(k)
+ model.load_state_dict(load_net, strict=True)
+ model.eval()
+ model = model.to(device)
+
+ return model
+
+
+def init_yolov5face_model(model_name, device='cuda'):
+ current_dir = str(pathlib.Path(__file__).parent.resolve())
+ if model_name == 'YOLOv5l':
+ model = YoloDetector(config_name=current_dir+'/yolov5face/models/yolov5l.yaml', device=device)
+ model_url = 'https://github.com/sczhou/CodeFormer/releases/download/v0.1.0/yolov5l-face.pth'
+ elif model_name == 'YOLOv5n':
+ model = YoloDetector(config_name=current_dir+'/yolov5face/models/yolov5n.yaml', device=device)
+ model_url = 'https://github.com/sczhou/CodeFormer/releases/download/v0.1.0/yolov5n-face.pth'
+ else:
+ raise NotImplementedError(f'{model_name} is not implemented.')
+
+ model_path = load_file_from_url(url=model_url, model_dir='../../models/facedetection', progress=True, file_name=None)
+ load_net = torch.load(model_path, map_location=lambda storage, loc: storage)
+ model.detector.load_state_dict(load_net, strict=True)
+ model.detector.eval()
+ model.detector = model.detector.to(device).float()
+
+ for m in model.detector.modules():
+ if type(m) in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU]:
+ m.inplace = True # pytorch 1.7.0 compatibility
+ elif isinstance(m, Conv):
+ m._non_persistent_buffers_set = set() # pytorch 1.6.0 compatibility
+
+ return model
+
+
+# Download from Google Drive
+# def init_yolov5face_model(model_name, device='cuda'):
+# if model_name == 'YOLOv5l':
+# model = YoloDetector(config_name='facelib/detection/yolov5face/models/yolov5l.yaml', device=device)
+# f_id = {'yolov5l-face.pth': '131578zMA6B2x8VQHyHfa6GEPtulMCNzV'}
+# elif model_name == 'YOLOv5n':
+# model = YoloDetector(config_name='facelib/detection/yolov5face/models/yolov5n.yaml', device=device)
+# f_id = {'yolov5n-face.pth': '1fhcpFvWZqghpGXjYPIne2sw1Fy4yhw6o'}
+# else:
+# raise NotImplementedError(f'{model_name} is not implemented.')
+
+# model_path = os.path.join('../../models/facedetection', list(f_id.keys())[0])
+# if not os.path.exists(model_path):
+# download_pretrained_models(file_ids=f_id, save_path_root='../../models/facedetection')
+
+# load_net = torch.load(model_path, map_location=lambda storage, loc: storage)
+# model.detector.load_state_dict(load_net, strict=True)
+# model.detector.eval()
+# model.detector = model.detector.to(device).float()
+
+# for m in model.detector.modules():
+# if type(m) in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU]:
+# m.inplace = True # pytorch 1.7.0 compatibility
+# elif isinstance(m, Conv):
+# m._non_persistent_buffers_set = set() # pytorch 1.6.0 compatibility
+
+# return model \ No newline at end of file
diff --git a/r_facelib/detection/align_trans.py b/r_facelib/detection/align_trans.py
new file mode 100644
index 0000000..0b7374a
--- /dev/null
+++ b/r_facelib/detection/align_trans.py
@@ -0,0 +1,219 @@
+import cv2
+import numpy as np
+
+from .matlab_cp2tform import get_similarity_transform_for_cv2
+
+# reference facial points, a list of coordinates (x,y)
+REFERENCE_FACIAL_POINTS = [[30.29459953, 51.69630051], [65.53179932, 51.50139999], [48.02519989, 71.73660278],
+ [33.54930115, 92.3655014], [62.72990036, 92.20410156]]
+
+DEFAULT_CROP_SIZE = (96, 112)
+
+
+class FaceWarpException(Exception):
+
+ def __str__(self):
+ return 'In File {}:{}'.format(__file__, super.__str__(self))
+
+
+def get_reference_facial_points(output_size=None, inner_padding_factor=0.0, outer_padding=(0, 0), default_square=False):
+ """
+ Function:
+ ----------
+ get reference 5 key points according to crop settings:
+ 0. Set default crop_size:
+ if default_square:
+ crop_size = (112, 112)
+ else:
+ crop_size = (96, 112)
+ 1. Pad the crop_size by inner_padding_factor in each side;
+ 2. Resize crop_size into (output_size - outer_padding*2),
+ pad into output_size with outer_padding;
+ 3. Output reference_5point;
+ Parameters:
+ ----------
+ @output_size: (w, h) or None
+ size of aligned face image
+ @inner_padding_factor: (w_factor, h_factor)
+ padding factor for inner (w, h)
+ @outer_padding: (w_pad, h_pad)
+ each row is a pair of coordinates (x, y)
+ @default_square: True or False
+ if True:
+ default crop_size = (112, 112)
+ else:
+ default crop_size = (96, 112);
+ !!! make sure, if output_size is not None:
+ (output_size - outer_padding)
+ = some_scale * (default crop_size * (1.0 +
+ inner_padding_factor))
+ Returns:
+ ----------
+ @reference_5point: 5x2 np.array
+ each row is a pair of transformed coordinates (x, y)
+ """
+
+ tmp_5pts = np.array(REFERENCE_FACIAL_POINTS)
+ tmp_crop_size = np.array(DEFAULT_CROP_SIZE)
+
+ # 0) make the inner region a square
+ if default_square:
+ size_diff = max(tmp_crop_size) - tmp_crop_size
+ tmp_5pts += size_diff / 2
+ tmp_crop_size += size_diff
+
+ if (output_size and output_size[0] == tmp_crop_size[0] and output_size[1] == tmp_crop_size[1]):
+
+ return tmp_5pts
+
+ if (inner_padding_factor == 0 and outer_padding == (0, 0)):
+ if output_size is None:
+ return tmp_5pts
+ else:
+ raise FaceWarpException('No paddings to do, output_size must be None or {}'.format(tmp_crop_size))
+
+ # check output size
+ if not (0 <= inner_padding_factor <= 1.0):
+ raise FaceWarpException('Not (0 <= inner_padding_factor <= 1.0)')
+
+ if ((inner_padding_factor > 0 or outer_padding[0] > 0 or outer_padding[1] > 0) and output_size is None):
+ output_size = tmp_crop_size * \
+ (1 + inner_padding_factor * 2).astype(np.int32)
+ output_size += np.array(outer_padding)
+ if not (outer_padding[0] < output_size[0] and outer_padding[1] < output_size[1]):
+ raise FaceWarpException('Not (outer_padding[0] < output_size[0] and outer_padding[1] < output_size[1])')
+
+ # 1) pad the inner region according inner_padding_factor
+ if inner_padding_factor > 0:
+ size_diff = tmp_crop_size * inner_padding_factor * 2
+ tmp_5pts += size_diff / 2
+ tmp_crop_size += np.round(size_diff).astype(np.int32)
+
+ # 2) resize the padded inner region
+ size_bf_outer_pad = np.array(output_size) - np.array(outer_padding) * 2
+
+ if size_bf_outer_pad[0] * tmp_crop_size[1] != size_bf_outer_pad[1] * tmp_crop_size[0]:
+ raise FaceWarpException('Must have (output_size - outer_padding)'
+ '= some_scale * (crop_size * (1.0 + inner_padding_factor)')
+
+ scale_factor = size_bf_outer_pad[0].astype(np.float32) / tmp_crop_size[0]
+ tmp_5pts = tmp_5pts * scale_factor
+ # size_diff = tmp_crop_size * (scale_factor - min(scale_factor))
+ # tmp_5pts = tmp_5pts + size_diff / 2
+ tmp_crop_size = size_bf_outer_pad
+
+ # 3) add outer_padding to make output_size
+ reference_5point = tmp_5pts + np.array(outer_padding)
+ tmp_crop_size = output_size
+
+ return reference_5point
+
+
+def get_affine_transform_matrix(src_pts, dst_pts):
+ """
+ Function:
+ ----------
+ get affine transform matrix 'tfm' from src_pts to dst_pts
+ Parameters:
+ ----------
+ @src_pts: Kx2 np.array
+ source points matrix, each row is a pair of coordinates (x, y)
+ @dst_pts: Kx2 np.array
+ destination points matrix, each row is a pair of coordinates (x, y)
+ Returns:
+ ----------
+ @tfm: 2x3 np.array
+ transform matrix from src_pts to dst_pts
+ """
+
+ tfm = np.float32([[1, 0, 0], [0, 1, 0]])
+ n_pts = src_pts.shape[0]
+ ones = np.ones((n_pts, 1), src_pts.dtype)
+ src_pts_ = np.hstack([src_pts, ones])
+ dst_pts_ = np.hstack([dst_pts, ones])
+
+ A, res, rank, s = np.linalg.lstsq(src_pts_, dst_pts_)
+
+ if rank == 3:
+ tfm = np.float32([[A[0, 0], A[1, 0], A[2, 0]], [A[0, 1], A[1, 1], A[2, 1]]])
+ elif rank == 2:
+ tfm = np.float32([[A[0, 0], A[1, 0], 0], [A[0, 1], A[1, 1], 0]])
+
+ return tfm
+
+
+def warp_and_crop_face(src_img, facial_pts, reference_pts=None, crop_size=(96, 112), align_type='smilarity'):
+ """
+ Function:
+ ----------
+ apply affine transform 'trans' to uv
+ Parameters:
+ ----------
+ @src_img: 3x3 np.array
+ input image
+ @facial_pts: could be
+ 1)a list of K coordinates (x,y)
+ or
+ 2) Kx2 or 2xK np.array
+ each row or col is a pair of coordinates (x, y)
+ @reference_pts: could be
+ 1) a list of K coordinates (x,y)
+ or
+ 2) Kx2 or 2xK np.array
+ each row or col is a pair of coordinates (x, y)
+ or
+ 3) None
+ if None, use default reference facial points
+ @crop_size: (w, h)
+ output face image size
+ @align_type: transform type, could be one of
+ 1) 'similarity': use similarity transform
+ 2) 'cv2_affine': use the first 3 points to do affine transform,
+ by calling cv2.getAffineTransform()
+ 3) 'affine': use all points to do affine transform
+ Returns:
+ ----------
+ @face_img: output face image with size (w, h) = @crop_size
+ """
+
+ if reference_pts is None:
+ if crop_size[0] == 96 and crop_size[1] == 112:
+ reference_pts = REFERENCE_FACIAL_POINTS
+ else:
+ default_square = False
+ inner_padding_factor = 0
+ outer_padding = (0, 0)
+ output_size = crop_size
+
+ reference_pts = get_reference_facial_points(output_size, inner_padding_factor, outer_padding,
+ default_square)
+
+ ref_pts = np.float32(reference_pts)
+ ref_pts_shp = ref_pts.shape
+ if max(ref_pts_shp) < 3 or min(ref_pts_shp) != 2:
+ raise FaceWarpException('reference_pts.shape must be (K,2) or (2,K) and K>2')
+
+ if ref_pts_shp[0] == 2:
+ ref_pts = ref_pts.T
+
+ src_pts = np.float32(facial_pts)
+ src_pts_shp = src_pts.shape
+ if max(src_pts_shp) < 3 or min(src_pts_shp) != 2:
+ raise FaceWarpException('facial_pts.shape must be (K,2) or (2,K) and K>2')
+
+ if src_pts_shp[0] == 2:
+ src_pts = src_pts.T
+
+ if src_pts.shape != ref_pts.shape:
+ raise FaceWarpException('facial_pts and reference_pts must have the same shape')
+
+ if align_type == 'cv2_affine':
+ tfm = cv2.getAffineTransform(src_pts[0:3], ref_pts[0:3])
+ elif align_type == 'affine':
+ tfm = get_affine_transform_matrix(src_pts, ref_pts)
+ else:
+ tfm = get_similarity_transform_for_cv2(src_pts, ref_pts)
+
+ face_img = cv2.warpAffine(src_img, tfm, (crop_size[0], crop_size[1]))
+
+ return face_img
diff --git a/r_facelib/detection/matlab_cp2tform.py b/r_facelib/detection/matlab_cp2tform.py
new file mode 100644
index 0000000..b1014a8
--- /dev/null
+++ b/r_facelib/detection/matlab_cp2tform.py
@@ -0,0 +1,317 @@
+import numpy as np
+from numpy.linalg import inv, lstsq
+from numpy.linalg import matrix_rank as rank
+from numpy.linalg import norm
+
+
+class MatlabCp2tormException(Exception):
+
+ def __str__(self):
+ return 'In File {}:{}'.format(__file__, super.__str__(self))
+
+
+def tformfwd(trans, uv):
+ """
+ Function:
+ ----------
+ apply affine transform 'trans' to uv
+
+ Parameters:
+ ----------
+ @trans: 3x3 np.array
+ transform matrix
+ @uv: Kx2 np.array
+ each row is a pair of coordinates (x, y)
+
+ Returns:
+ ----------
+ @xy: Kx2 np.array
+ each row is a pair of transformed coordinates (x, y)
+ """
+ uv = np.hstack((uv, np.ones((uv.shape[0], 1))))
+ xy = np.dot(uv, trans)
+ xy = xy[:, 0:-1]
+ return xy
+
+
+def tforminv(trans, uv):
+ """
+ Function:
+ ----------
+ apply the inverse of affine transform 'trans' to uv
+
+ Parameters:
+ ----------
+ @trans: 3x3 np.array
+ transform matrix
+ @uv: Kx2 np.array
+ each row is a pair of coordinates (x, y)
+
+ Returns:
+ ----------
+ @xy: Kx2 np.array
+ each row is a pair of inverse-transformed coordinates (x, y)
+ """
+ Tinv = inv(trans)
+ xy = tformfwd(Tinv, uv)
+ return xy
+
+
+def findNonreflectiveSimilarity(uv, xy, options=None):
+ options = {'K': 2}
+
+ K = options['K']
+ M = xy.shape[0]
+ x = xy[:, 0].reshape((-1, 1)) # use reshape to keep a column vector
+ y = xy[:, 1].reshape((-1, 1)) # use reshape to keep a column vector
+
+ tmp1 = np.hstack((x, y, np.ones((M, 1)), np.zeros((M, 1))))
+ tmp2 = np.hstack((y, -x, np.zeros((M, 1)), np.ones((M, 1))))
+ X = np.vstack((tmp1, tmp2))
+
+ u = uv[:, 0].reshape((-1, 1)) # use reshape to keep a column vector
+ v = uv[:, 1].reshape((-1, 1)) # use reshape to keep a column vector
+ U = np.vstack((u, v))
+
+ # We know that X * r = U
+ if rank(X) >= 2 * K:
+ r, _, _, _ = lstsq(X, U, rcond=-1)
+ r = np.squeeze(r)
+ else:
+ raise Exception('cp2tform:twoUniquePointsReq')
+ sc = r[0]
+ ss = r[1]
+ tx = r[2]
+ ty = r[3]
+
+ Tinv = np.array([[sc, -ss, 0], [ss, sc, 0], [tx, ty, 1]])
+ T = inv(Tinv)
+ T[:, 2] = np.array([0, 0, 1])
+
+ return T, Tinv
+
+
+def findSimilarity(uv, xy, options=None):
+ options = {'K': 2}
+
+ # uv = np.array(uv)
+ # xy = np.array(xy)
+
+ # Solve for trans1
+ trans1, trans1_inv = findNonreflectiveSimilarity(uv, xy, options)
+
+ # Solve for trans2
+
+ # manually reflect the xy data across the Y-axis
+ xyR = xy
+ xyR[:, 0] = -1 * xyR[:, 0]
+
+ trans2r, trans2r_inv = findNonreflectiveSimilarity(uv, xyR, options)
+
+ # manually reflect the tform to undo the reflection done on xyR
+ TreflectY = np.array([[-1, 0, 0], [0, 1, 0], [0, 0, 1]])
+
+ trans2 = np.dot(trans2r, TreflectY)
+
+ # Figure out if trans1 or trans2 is better
+ xy1 = tformfwd(trans1, uv)
+ norm1 = norm(xy1 - xy)
+
+ xy2 = tformfwd(trans2, uv)
+ norm2 = norm(xy2 - xy)
+
+ if norm1 <= norm2:
+ return trans1, trans1_inv
+ else:
+ trans2_inv = inv(trans2)
+ return trans2, trans2_inv
+
+
+def get_similarity_transform(src_pts, dst_pts, reflective=True):
+ """
+ Function:
+ ----------
+ Find Similarity Transform Matrix 'trans':
+ u = src_pts[:, 0]
+ v = src_pts[:, 1]
+ x = dst_pts[:, 0]
+ y = dst_pts[:, 1]
+ [x, y, 1] = [u, v, 1] * trans
+
+ Parameters:
+ ----------
+ @src_pts: Kx2 np.array
+ source points, each row is a pair of coordinates (x, y)
+ @dst_pts: Kx2 np.array
+ destination points, each row is a pair of transformed
+ coordinates (x, y)
+ @reflective: True or False
+ if True:
+ use reflective similarity transform
+ else:
+ use non-reflective similarity transform
+
+ Returns:
+ ----------
+ @trans: 3x3 np.array
+ transform matrix from uv to xy
+ trans_inv: 3x3 np.array
+ inverse of trans, transform matrix from xy to uv
+ """
+
+ if reflective:
+ trans, trans_inv = findSimilarity(src_pts, dst_pts)
+ else:
+ trans, trans_inv = findNonreflectiveSimilarity(src_pts, dst_pts)
+
+ return trans, trans_inv
+
+
+def cvt_tform_mat_for_cv2(trans):
+ """
+ Function:
+ ----------
+ Convert Transform Matrix 'trans' into 'cv2_trans' which could be
+ directly used by cv2.warpAffine():
+ u = src_pts[:, 0]
+ v = src_pts[:, 1]
+ x = dst_pts[:, 0]
+ y = dst_pts[:, 1]
+ [x, y].T = cv_trans * [u, v, 1].T
+
+ Parameters:
+ ----------
+ @trans: 3x3 np.array
+ transform matrix from uv to xy
+
+ Returns:
+ ----------
+ @cv2_trans: 2x3 np.array
+ transform matrix from src_pts to dst_pts, could be directly used
+ for cv2.warpAffine()
+ """
+ cv2_trans = trans[:, 0:2].T
+
+ return cv2_trans
+
+
+def get_similarity_transform_for_cv2(src_pts, dst_pts, reflective=True):
+ """
+ Function:
+ ----------
+ Find Similarity Transform Matrix 'cv2_trans' which could be
+ directly used by cv2.warpAffine():
+ u = src_pts[:, 0]
+ v = src_pts[:, 1]
+ x = dst_pts[:, 0]
+ y = dst_pts[:, 1]
+ [x, y].T = cv_trans * [u, v, 1].T
+
+ Parameters:
+ ----------
+ @src_pts: Kx2 np.array
+ source points, each row is a pair of coordinates (x, y)
+ @dst_pts: Kx2 np.array
+ destination points, each row is a pair of transformed
+ coordinates (x, y)
+ reflective: True or False
+ if True:
+ use reflective similarity transform
+ else:
+ use non-reflective similarity transform
+
+ Returns:
+ ----------
+ @cv2_trans: 2x3 np.array
+ transform matrix from src_pts to dst_pts, could be directly used
+ for cv2.warpAffine()
+ """
+ trans, trans_inv = get_similarity_transform(src_pts, dst_pts, reflective)
+ cv2_trans = cvt_tform_mat_for_cv2(trans)
+
+ return cv2_trans
+
+
+if __name__ == '__main__':
+ """
+ u = [0, 6, -2]
+ v = [0, 3, 5]
+ x = [-1, 0, 4]
+ y = [-1, -10, 4]
+
+ # In Matlab, run:
+ #
+ # uv = [u'; v'];
+ # xy = [x'; y'];
+ # tform_sim=cp2tform(uv,xy,'similarity');
+ #
+ # trans = tform_sim.tdata.T
+ # ans =
+ # -0.0764 -1.6190 0
+ # 1.6190 -0.0764 0
+ # -3.2156 0.0290 1.0000
+ # trans_inv = tform_sim.tdata.Tinv
+ # ans =
+ #
+ # -0.0291 0.6163 0
+ # -0.6163 -0.0291 0
+ # -0.0756 1.9826 1.0000
+ # xy_m=tformfwd(tform_sim, u,v)
+ #
+ # xy_m =
+ #
+ # -3.2156 0.0290
+ # 1.1833 -9.9143
+ # 5.0323 2.8853
+ # uv_m=tforminv(tform_sim, x,y)
+ #
+ # uv_m =
+ #
+ # 0.5698 1.3953
+ # 6.0872 2.2733
+ # -2.6570 4.3314
+ """
+ u = [0, 6, -2]
+ v = [0, 3, 5]
+ x = [-1, 0, 4]
+ y = [-1, -10, 4]
+
+ uv = np.array((u, v)).T
+ xy = np.array((x, y)).T
+
+ print('\n--->uv:')
+ print(uv)
+ print('\n--->xy:')
+ print(xy)
+
+ trans, trans_inv = get_similarity_transform(uv, xy)
+
+ print('\n--->trans matrix:')
+ print(trans)
+
+ print('\n--->trans_inv matrix:')
+ print(trans_inv)
+
+ print('\n---> apply transform to uv')
+ print('\nxy_m = uv_augmented * trans')
+ uv_aug = np.hstack((uv, np.ones((uv.shape[0], 1))))
+ xy_m = np.dot(uv_aug, trans)
+ print(xy_m)
+
+ print('\nxy_m = tformfwd(trans, uv)')
+ xy_m = tformfwd(trans, uv)
+ print(xy_m)
+
+ print('\n---> apply inverse transform to xy')
+ print('\nuv_m = xy_augmented * trans_inv')
+ xy_aug = np.hstack((xy, np.ones((xy.shape[0], 1))))
+ uv_m = np.dot(xy_aug, trans_inv)
+ print(uv_m)
+
+ print('\nuv_m = tformfwd(trans_inv, xy)')
+ uv_m = tformfwd(trans_inv, xy)
+ print(uv_m)
+
+ uv_m = tforminv(trans, xy)
+ print('\nuv_m = tforminv(trans, xy)')
+ print(uv_m)
diff --git a/r_facelib/detection/retinaface/retinaface.py b/r_facelib/detection/retinaface/retinaface.py
new file mode 100644
index 0000000..5d9770a
--- /dev/null
+++ b/r_facelib/detection/retinaface/retinaface.py
@@ -0,0 +1,389 @@
+import cv2
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from PIL import Image
+from torchvision.models._utils import IntermediateLayerGetter as IntermediateLayerGetter
+
+from modules import shared
+
+from r_facelib.detection.align_trans import get_reference_facial_points, warp_and_crop_face
+from r_facelib.detection.retinaface.retinaface_net import FPN, SSH, MobileNetV1, make_bbox_head, make_class_head, make_landmark_head
+from r_facelib.detection.retinaface.retinaface_utils import (PriorBox, batched_decode, batched_decode_landm, decode, decode_landm,
+ py_cpu_nms)
+
+#device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+if torch.cuda.is_available():
+ device = torch.device('cuda')
+elif torch.backends.mps.is_available():
+ device = torch.device('mps')
+# elif hasattr(torch,'dml'):
+# device = torch.device('dml')
+elif hasattr(torch,'dml') or hasattr(torch,'privateuseone'): # AMD
+ if shared.cmd_opts is not None: # A1111
+ if shared.cmd_opts.device_id is not None:
+ device = torch.device(f'privateuseone:{shared.cmd_opts.device_id}')
+ else:
+ device = torch.device('privateuseone:0')
+ else:
+ device = torch.device('privateuseone:0')
+else:
+ device = torch.device('cpu')
+
+
+def generate_config(network_name):
+
+ cfg_mnet = {
+ 'name': 'mobilenet0.25',
+ 'min_sizes': [[16, 32], [64, 128], [256, 512]],
+ 'steps': [8, 16, 32],
+ 'variance': [0.1, 0.2],
+ 'clip': False,
+ 'loc_weight': 2.0,
+ 'gpu_train': True,
+ 'batch_size': 32,
+ 'ngpu': 1,
+ 'epoch': 250,
+ 'decay1': 190,
+ 'decay2': 220,
+ 'image_size': 640,
+ 'return_layers': {
+ 'stage1': 1,
+ 'stage2': 2,
+ 'stage3': 3
+ },
+ 'in_channel': 32,
+ 'out_channel': 64
+ }
+
+ cfg_re50 = {
+ 'name': 'Resnet50',
+ 'min_sizes': [[16, 32], [64, 128], [256, 512]],
+ 'steps': [8, 16, 32],
+ 'variance': [0.1, 0.2],
+ 'clip': False,
+ 'loc_weight': 2.0,
+ 'gpu_train': True,
+ 'batch_size': 24,
+ 'ngpu': 4,
+ 'epoch': 100,
+ 'decay1': 70,
+ 'decay2': 90,
+ 'image_size': 840,
+ 'return_layers': {
+ 'layer2': 1,
+ 'layer3': 2,
+ 'layer4': 3
+ },
+ 'in_channel': 256,
+ 'out_channel': 256
+ }
+
+ if network_name == 'mobile0.25':
+ return cfg_mnet
+ elif network_name == 'resnet50':
+ return cfg_re50
+ else:
+ raise NotImplementedError(f'network_name={network_name}')
+
+
+class RetinaFace(nn.Module):
+
+ def __init__(self, network_name='resnet50', half=False, phase='test'):
+ super(RetinaFace, self).__init__()
+ self.half_inference = half
+ cfg = generate_config(network_name)
+ self.backbone = cfg['name']
+
+ self.model_name = f'retinaface_{network_name}'
+ self.cfg = cfg
+ self.phase = phase
+ self.target_size, self.max_size = 1600, 2150
+ self.resize, self.scale, self.scale1 = 1., None, None
+ self.mean_tensor = torch.tensor([[[[104.]], [[117.]], [[123.]]]]).to(device)
+ self.reference = get_reference_facial_points(default_square=True)
+ # Build network.
+ backbone = None
+ if cfg['name'] == 'mobilenet0.25':
+ backbone = MobileNetV1()
+ self.body = IntermediateLayerGetter(backbone, cfg['return_layers'])
+ elif cfg['name'] == 'Resnet50':
+ import torchvision.models as models
+ backbone = models.resnet50(pretrained=False)
+ self.body = IntermediateLayerGetter(backbone, cfg['return_layers'])
+
+ in_channels_stage2 = cfg['in_channel']
+ in_channels_list = [
+ in_channels_stage2 * 2,
+ in_channels_stage2 * 4,
+ in_channels_stage2 * 8,
+ ]
+
+ out_channels = cfg['out_channel']
+ self.fpn = FPN(in_channels_list, out_channels)
+ self.ssh1 = SSH(out_channels, out_channels)
+ self.ssh2 = SSH(out_channels, out_channels)
+ self.ssh3 = SSH(out_channels, out_channels)
+
+ self.ClassHead = make_class_head(fpn_num=3, inchannels=cfg['out_channel'])
+ self.BboxHead = make_bbox_head(fpn_num=3, inchannels=cfg['out_channel'])
+ self.LandmarkHead = make_landmark_head(fpn_num=3, inchannels=cfg['out_channel'])
+
+ self.to(device)
+ self.eval()
+ if self.half_inference:
+ self.half()
+
+ def forward(self, inputs):
+ self.to(device)
+ out = self.body(inputs)
+
+ if self.backbone == 'mobilenet0.25' or self.backbone == 'Resnet50':
+ out = list(out.values())
+ # FPN
+ fpn = self.fpn(out)
+
+ # SSH
+ feature1 = self.ssh1(fpn[0])
+ feature2 = self.ssh2(fpn[1])
+ feature3 = self.ssh3(fpn[2])
+ features = [feature1, feature2, feature3]
+
+ bbox_regressions = torch.cat([self.BboxHead[i](feature) for i, feature in enumerate(features)], dim=1)
+ classifications = torch.cat([self.ClassHead[i](feature) for i, feature in enumerate(features)], dim=1)
+ tmp = [self.LandmarkHead[i](feature) for i, feature in enumerate(features)]
+ ldm_regressions = (torch.cat(tmp, dim=1))
+
+ if self.phase == 'train':
+ output = (bbox_regressions, classifications, ldm_regressions)
+ else:
+ output = (bbox_regressions, F.softmax(classifications, dim=-1), ldm_regressions)
+ return output
+
+ def __detect_faces(self, inputs):
+ # get scale
+ height, width = inputs.shape[2:]
+ self.scale = torch.tensor([width, height, width, height], dtype=torch.float32).to(device)
+ tmp = [width, height, width, height, width, height, width, height, width, height]
+ self.scale1 = torch.tensor(tmp, dtype=torch.float32).to(device)
+
+ # forawrd
+ inputs = inputs.to(device)
+ if self.half_inference:
+ inputs = inputs.half()
+ loc, conf, landmarks = self(inputs)
+
+ # get priorbox
+ priorbox = PriorBox(self.cfg, image_size=inputs.shape[2:])
+ priors = priorbox.forward().to(device)
+
+ return loc, conf, landmarks, priors
+
+ # single image detection
+ def transform(self, image, use_origin_size):
+ # convert to opencv format
+ if isinstance(image, Image.Image):
+ image = cv2.cvtColor(np.asarray(image), cv2.COLOR_RGB2BGR)
+ image = image.astype(np.float32)
+
+ # testing scale
+ im_size_min = np.min(image.shape[0:2])
+ im_size_max = np.max(image.shape[0:2])
+ resize = float(self.target_size) / float(im_size_min)
+
+ # prevent bigger axis from being more than max_size
+ if np.round(resize * im_size_max) > self.max_size:
+ resize = float(self.max_size) / float(im_size_max)
+ resize = 1 if use_origin_size else resize
+
+ # resize
+ if resize != 1:
+ image = cv2.resize(image, None, None, fx=resize, fy=resize, interpolation=cv2.INTER_LINEAR)
+
+ # convert to torch.tensor format
+ # image -= (104, 117, 123)
+ image = image.transpose(2, 0, 1)
+ image = torch.from_numpy(image).unsqueeze(0)
+
+ return image, resize
+
+ def detect_faces(
+ self,
+ image,
+ conf_threshold=0.8,
+ nms_threshold=0.4,
+ use_origin_size=True,
+ ):
+ """
+ Params:
+ imgs: BGR image
+ """
+ image, self.resize = self.transform(image, use_origin_size)
+ image = image.to(device)
+ if self.half_inference:
+ image = image.half()
+ image = image - self.mean_tensor
+
+ loc, conf, landmarks, priors = self.__detect_faces(image)
+
+ boxes = decode(loc.data.squeeze(0), priors.data, self.cfg['variance'])
+ boxes = boxes * self.scale / self.resize
+ boxes = boxes.cpu().numpy()
+
+ scores = conf.squeeze(0).data.cpu().numpy()[:, 1]
+
+ landmarks = decode_landm(landmarks.squeeze(0), priors, self.cfg['variance'])
+ landmarks = landmarks * self.scale1 / self.resize
+ landmarks = landmarks.cpu().numpy()
+
+ # ignore low scores
+ inds = np.where(scores > conf_threshold)[0]
+ boxes, landmarks, scores = boxes[inds], landmarks[inds], scores[inds]
+
+ # sort
+ order = scores.argsort()[::-1]
+ boxes, landmarks, scores = boxes[order], landmarks[order], scores[order]
+
+ # do NMS
+ bounding_boxes = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
+ keep = py_cpu_nms(bounding_boxes, nms_threshold)
+ bounding_boxes, landmarks = bounding_boxes[keep, :], landmarks[keep]
+ # self.t['forward_pass'].toc()
+ # print(self.t['forward_pass'].average_time)
+ # import sys
+ # sys.stdout.flush()
+ return np.concatenate((bounding_boxes, landmarks), axis=1)
+
+ def __align_multi(self, image, boxes, landmarks, limit=None):
+
+ if len(boxes) < 1:
+ return [], []
+
+ if limit:
+ boxes = boxes[:limit]
+ landmarks = landmarks[:limit]
+
+ faces = []
+ for landmark in landmarks:
+ facial5points = [[landmark[2 * j], landmark[2 * j + 1]] for j in range(5)]
+
+ warped_face = warp_and_crop_face(np.array(image), facial5points, self.reference, crop_size=(112, 112))
+ faces.append(warped_face)
+
+ return np.concatenate((boxes, landmarks), axis=1), faces
+
+ def align_multi(self, img, conf_threshold=0.8, limit=None):
+
+ rlt = self.detect_faces(img, conf_threshold=conf_threshold)
+ boxes, landmarks = rlt[:, 0:5], rlt[:, 5:]
+
+ return self.__align_multi(img, boxes, landmarks, limit)
+
+ # batched detection
+ def batched_transform(self, frames, use_origin_size):
+ """
+ Arguments:
+ frames: a list of PIL.Image, or torch.Tensor(shape=[n, h, w, c],
+ type=np.float32, BGR format).
+ use_origin_size: whether to use origin size.
+ """
+ from_PIL = True if isinstance(frames[0], Image.Image) else False
+
+ # convert to opencv format
+ if from_PIL:
+ frames = [cv2.cvtColor(np.asarray(frame), cv2.COLOR_RGB2BGR) for frame in frames]
+ frames = np.asarray(frames, dtype=np.float32)
+
+ # testing scale
+ im_size_min = np.min(frames[0].shape[0:2])
+ im_size_max = np.max(frames[0].shape[0:2])
+ resize = float(self.target_size) / float(im_size_min)
+
+ # prevent bigger axis from being more than max_size
+ if np.round(resize * im_size_max) > self.max_size:
+ resize = float(self.max_size) / float(im_size_max)
+ resize = 1 if use_origin_size else resize
+
+ # resize
+ if resize != 1:
+ if not from_PIL:
+ frames = F.interpolate(frames, scale_factor=resize)
+ else:
+ frames = [
+ cv2.resize(frame, None, None, fx=resize, fy=resize, interpolation=cv2.INTER_LINEAR)
+ for frame in frames
+ ]
+
+ # convert to torch.tensor format
+ if not from_PIL:
+ frames = frames.transpose(1, 2).transpose(1, 3).contiguous()
+ else:
+ frames = frames.transpose((0, 3, 1, 2))
+ frames = torch.from_numpy(frames)
+
+ return frames, resize
+
+ def batched_detect_faces(self, frames, conf_threshold=0.8, nms_threshold=0.4, use_origin_size=True):
+ """
+ Arguments:
+ frames: a list of PIL.Image, or np.array(shape=[n, h, w, c],
+ type=np.uint8, BGR format).
+ conf_threshold: confidence threshold.
+ nms_threshold: nms threshold.
+ use_origin_size: whether to use origin size.
+ Returns:
+ final_bounding_boxes: list of np.array ([n_boxes, 5],
+ type=np.float32).
+ final_landmarks: list of np.array ([n_boxes, 10], type=np.float32).
+ """
+ # self.t['forward_pass'].tic()
+ frames, self.resize = self.batched_transform(frames, use_origin_size)
+ frames = frames.to(device)
+ frames = frames - self.mean_tensor
+
+ b_loc, b_conf, b_landmarks, priors = self.__detect_faces(frames)
+
+ final_bounding_boxes, final_landmarks = [], []
+
+ # decode
+ priors = priors.unsqueeze(0)
+ b_loc = batched_decode(b_loc, priors, self.cfg['variance']) * self.scale / self.resize
+ b_landmarks = batched_decode_landm(b_landmarks, priors, self.cfg['variance']) * self.scale1 / self.resize
+ b_conf = b_conf[:, :, 1]
+
+ # index for selection
+ b_indice = b_conf > conf_threshold
+
+ # concat
+ b_loc_and_conf = torch.cat((b_loc, b_conf.unsqueeze(-1)), dim=2).float()
+
+ for pred, landm, inds in zip(b_loc_and_conf, b_landmarks, b_indice):
+
+ # ignore low scores
+ pred, landm = pred[inds, :], landm[inds, :]
+ if pred.shape[0] == 0:
+ final_bounding_boxes.append(np.array([], dtype=np.float32))
+ final_landmarks.append(np.array([], dtype=np.float32))
+ continue
+
+ # sort
+ # order = score.argsort(descending=True)
+ # box, landm, score = box[order], landm[order], score[order]
+
+ # to CPU
+ bounding_boxes, landm = pred.cpu().numpy(), landm.cpu().numpy()
+
+ # NMS
+ keep = py_cpu_nms(bounding_boxes, nms_threshold)
+ bounding_boxes, landmarks = bounding_boxes[keep, :], landm[keep]
+
+ # append
+ final_bounding_boxes.append(bounding_boxes)
+ final_landmarks.append(landmarks)
+ # self.t['forward_pass'].toc(average=True)
+ # self.batch_time += self.t['forward_pass'].diff
+ # self.total_frame += len(frames)
+ # print(self.batch_time / self.total_frame)
+
+ return final_bounding_boxes, final_landmarks
diff --git a/r_facelib/detection/retinaface/retinaface_net.py b/r_facelib/detection/retinaface/retinaface_net.py
new file mode 100644
index 0000000..c52535e
--- /dev/null
+++ b/r_facelib/detection/retinaface/retinaface_net.py
@@ -0,0 +1,196 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+def conv_bn(inp, oup, stride=1, leaky=0):
+ return nn.Sequential(
+ nn.Conv2d(inp, oup, 3, stride, 1, bias=False), nn.BatchNorm2d(oup),
+ nn.LeakyReLU(negative_slope=leaky, inplace=True))
+
+
+def conv_bn_no_relu(inp, oup, stride):
+ return nn.Sequential(
+ nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
+ nn.BatchNorm2d(oup),
+ )
+
+
+def conv_bn1X1(inp, oup, stride, leaky=0):
+ return nn.Sequential(
+ nn.Conv2d(inp, oup, 1, stride, padding=0, bias=False), nn.BatchNorm2d(oup),
+ nn.LeakyReLU(negative_slope=leaky, inplace=True))
+
+
+def conv_dw(inp, oup, stride, leaky=0.1):
+ return nn.Sequential(
+ nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False),
+ nn.BatchNorm2d(inp),
+ nn.LeakyReLU(negative_slope=leaky, inplace=True),
+ nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
+ nn.BatchNorm2d(oup),
+ nn.LeakyReLU(negative_slope=leaky, inplace=True),
+ )
+
+
+class SSH(nn.Module):
+
+ def __init__(self, in_channel, out_channel):
+ super(SSH, self).__init__()
+ assert out_channel % 4 == 0
+ leaky = 0
+ if (out_channel <= 64):
+ leaky = 0.1
+ self.conv3X3 = conv_bn_no_relu(in_channel, out_channel // 2, stride=1)
+
+ self.conv5X5_1 = conv_bn(in_channel, out_channel // 4, stride=1, leaky=leaky)
+ self.conv5X5_2 = conv_bn_no_relu(out_channel // 4, out_channel // 4, stride=1)
+
+ self.conv7X7_2 = conv_bn(out_channel // 4, out_channel // 4, stride=1, leaky=leaky)
+ self.conv7x7_3 = conv_bn_no_relu(out_channel // 4, out_channel // 4, stride=1)
+
+ def forward(self, input):
+ conv3X3 = self.conv3X3(input)
+
+ conv5X5_1 = self.conv5X5_1(input)
+ conv5X5 = self.conv5X5_2(conv5X5_1)
+
+ conv7X7_2 = self.conv7X7_2(conv5X5_1)
+ conv7X7 = self.conv7x7_3(conv7X7_2)
+
+ out = torch.cat([conv3X3, conv5X5, conv7X7], dim=1)
+ out = F.relu(out)
+ return out
+
+
+class FPN(nn.Module):
+
+ def __init__(self, in_channels_list, out_channels):
+ super(FPN, self).__init__()
+ leaky = 0
+ if (out_channels <= 64):
+ leaky = 0.1
+ self.output1 = conv_bn1X1(in_channels_list[0], out_channels, stride=1, leaky=leaky)
+ self.output2 = conv_bn1X1(in_channels_list[1], out_channels, stride=1, leaky=leaky)
+ self.output3 = conv_bn1X1(in_channels_list[2], out_channels, stride=1, leaky=leaky)
+
+ self.merge1 = conv_bn(out_channels, out_channels, leaky=leaky)
+ self.merge2 = conv_bn(out_channels, out_channels, leaky=leaky)
+
+ def forward(self, input):
+ # names = list(input.keys())
+ # input = list(input.values())
+
+ output1 = self.output1(input[0])
+ output2 = self.output2(input[1])
+ output3 = self.output3(input[2])
+
+ up3 = F.interpolate(output3, size=[output2.size(2), output2.size(3)], mode='nearest')
+ output2 = output2 + up3
+ output2 = self.merge2(output2)
+
+ up2 = F.interpolate(output2, size=[output1.size(2), output1.size(3)], mode='nearest')
+ output1 = output1 + up2
+ output1 = self.merge1(output1)
+
+ out = [output1, output2, output3]
+ return out
+
+
+class MobileNetV1(nn.Module):
+
+ def __init__(self):
+ super(MobileNetV1, self).__init__()
+ self.stage1 = nn.Sequential(
+ conv_bn(3, 8, 2, leaky=0.1), # 3
+ conv_dw(8, 16, 1), # 7
+ conv_dw(16, 32, 2), # 11
+ conv_dw(32, 32, 1), # 19
+ conv_dw(32, 64, 2), # 27
+ conv_dw(64, 64, 1), # 43
+ )
+ self.stage2 = nn.Sequential(
+ conv_dw(64, 128, 2), # 43 + 16 = 59
+ conv_dw(128, 128, 1), # 59 + 32 = 91
+ conv_dw(128, 128, 1), # 91 + 32 = 123
+ conv_dw(128, 128, 1), # 123 + 32 = 155
+ conv_dw(128, 128, 1), # 155 + 32 = 187
+ conv_dw(128, 128, 1), # 187 + 32 = 219
+ )
+ self.stage3 = nn.Sequential(
+ conv_dw(128, 256, 2), # 219 +3 2 = 241
+ conv_dw(256, 256, 1), # 241 + 64 = 301
+ )
+ self.avg = nn.AdaptiveAvgPool2d((1, 1))
+ self.fc = nn.Linear(256, 1000)
+
+ def forward(self, x):
+ x = self.stage1(x)
+ x = self.stage2(x)
+ x = self.stage3(x)
+ x = self.avg(x)
+ # x = self.model(x)
+ x = x.view(-1, 256)
+ x = self.fc(x)
+ return x
+
+
+class ClassHead(nn.Module):
+
+ def __init__(self, inchannels=512, num_anchors=3):
+ super(ClassHead, self).__init__()
+ self.num_anchors = num_anchors
+ self.conv1x1 = nn.Conv2d(inchannels, self.num_anchors * 2, kernel_size=(1, 1), stride=1, padding=0)
+
+ def forward(self, x):
+ out = self.conv1x1(x)
+ out = out.permute(0, 2, 3, 1).contiguous()
+
+ return out.view(out.shape[0], -1, 2)
+
+
+class BboxHead(nn.Module):
+
+ def __init__(self, inchannels=512, num_anchors=3):
+ super(BboxHead, self).__init__()
+ self.conv1x1 = nn.Conv2d(inchannels, num_anchors * 4, kernel_size=(1, 1), stride=1, padding=0)
+
+ def forward(self, x):
+ out = self.conv1x1(x)
+ out = out.permute(0, 2, 3, 1).contiguous()
+
+ return out.view(out.shape[0], -1, 4)
+
+
+class LandmarkHead(nn.Module):
+
+ def __init__(self, inchannels=512, num_anchors=3):
+ super(LandmarkHead, self).__init__()
+ self.conv1x1 = nn.Conv2d(inchannels, num_anchors * 10, kernel_size=(1, 1), stride=1, padding=0)
+
+ def forward(self, x):
+ out = self.conv1x1(x)
+ out = out.permute(0, 2, 3, 1).contiguous()
+
+ return out.view(out.shape[0], -1, 10)
+
+
+def make_class_head(fpn_num=3, inchannels=64, anchor_num=2):
+ classhead = nn.ModuleList()
+ for i in range(fpn_num):
+ classhead.append(ClassHead(inchannels, anchor_num))
+ return classhead
+
+
+def make_bbox_head(fpn_num=3, inchannels=64, anchor_num=2):
+ bboxhead = nn.ModuleList()
+ for i in range(fpn_num):
+ bboxhead.append(BboxHead(inchannels, anchor_num))
+ return bboxhead
+
+
+def make_landmark_head(fpn_num=3, inchannels=64, anchor_num=2):
+ landmarkhead = nn.ModuleList()
+ for i in range(fpn_num):
+ landmarkhead.append(LandmarkHead(inchannels, anchor_num))
+ return landmarkhead
diff --git a/r_facelib/detection/retinaface/retinaface_utils.py b/r_facelib/detection/retinaface/retinaface_utils.py
new file mode 100644
index 0000000..f19e320
--- /dev/null
+++ b/r_facelib/detection/retinaface/retinaface_utils.py
@@ -0,0 +1,421 @@
+import numpy as np
+import torch
+import torchvision
+from itertools import product as product
+from math import ceil
+
+
+class PriorBox(object):
+
+ def __init__(self, cfg, image_size=None, phase='train'):
+ super(PriorBox, self).__init__()
+ self.min_sizes = cfg['min_sizes']
+ self.steps = cfg['steps']
+ self.clip = cfg['clip']
+ self.image_size = image_size
+ self.feature_maps = [[ceil(self.image_size[0] / step), ceil(self.image_size[1] / step)] for step in self.steps]
+ self.name = 's'
+
+ def forward(self):
+ anchors = []
+ for k, f in enumerate(self.feature_maps):
+ min_sizes = self.min_sizes[k]
+ for i, j in product(range(f[0]), range(f[1])):
+ for min_size in min_sizes:
+ s_kx = min_size / self.image_size[1]
+ s_ky = min_size / self.image_size[0]
+ dense_cx = [x * self.steps[k] / self.image_size[1] for x in [j + 0.5]]
+ dense_cy = [y * self.steps[k] / self.image_size[0] for y in [i + 0.5]]
+ for cy, cx in product(dense_cy, dense_cx):
+ anchors += [cx, cy, s_kx, s_ky]
+
+ # back to torch land
+ output = torch.Tensor(anchors).view(-1, 4)
+ if self.clip:
+ output.clamp_(max=1, min=0)
+ return output
+
+
+def py_cpu_nms(dets, thresh):
+ """Pure Python NMS baseline."""
+ keep = torchvision.ops.nms(
+ boxes=torch.Tensor(dets[:, :4]),
+ scores=torch.Tensor(dets[:, 4]),
+ iou_threshold=thresh,
+ )
+
+ return list(keep)
+
+
+def point_form(boxes):
+ """ Convert prior_boxes to (xmin, ymin, xmax, ymax)
+ representation for comparison to point form ground truth data.
+ Args:
+ boxes: (tensor) center-size default boxes from priorbox layers.
+ Return:
+ boxes: (tensor) Converted xmin, ymin, xmax, ymax form of boxes.
+ """
+ return torch.cat(
+ (
+ boxes[:, :2] - boxes[:, 2:] / 2, # xmin, ymin
+ boxes[:, :2] + boxes[:, 2:] / 2),
+ 1) # xmax, ymax
+
+
+def center_size(boxes):
+ """ Convert prior_boxes to (cx, cy, w, h)
+ representation for comparison to center-size form ground truth data.
+ Args:
+ boxes: (tensor) point_form boxes
+ Return:
+ boxes: (tensor) Converted xmin, ymin, xmax, ymax form of boxes.
+ """
+ return torch.cat(
+ (boxes[:, 2:] + boxes[:, :2]) / 2, # cx, cy
+ boxes[:, 2:] - boxes[:, :2],
+ 1) # w, h
+
+
+def intersect(box_a, box_b):
+ """ We resize both tensors to [A,B,2] without new malloc:
+ [A,2] -> [A,1,2] -> [A,B,2]
+ [B,2] -> [1,B,2] -> [A,B,2]
+ Then we compute the area of intersect between box_a and box_b.
+ Args:
+ box_a: (tensor) bounding boxes, Shape: [A,4].
+ box_b: (tensor) bounding boxes, Shape: [B,4].
+ Return:
+ (tensor) intersection area, Shape: [A,B].
+ """
+ A = box_a.size(0)
+ B = box_b.size(0)
+ max_xy = torch.min(box_a[:, 2:].unsqueeze(1).expand(A, B, 2), box_b[:, 2:].unsqueeze(0).expand(A, B, 2))
+ min_xy = torch.max(box_a[:, :2].unsqueeze(1).expand(A, B, 2), box_b[:, :2].unsqueeze(0).expand(A, B, 2))
+ inter = torch.clamp((max_xy - min_xy), min=0)
+ return inter[:, :, 0] * inter[:, :, 1]
+
+
+def jaccard(box_a, box_b):
+ """Compute the jaccard overlap of two sets of boxes. The jaccard overlap
+ is simply the intersection over union of two boxes. Here we operate on
+ ground truth boxes and default boxes.
+ E.g.:
+ A ∩ B / A ∪ B = A ∩ B / (area(A) + area(B) - A ∩ B)
+ Args:
+ box_a: (tensor) Ground truth bounding boxes, Shape: [num_objects,4]
+ box_b: (tensor) Prior boxes from priorbox layers, Shape: [num_priors,4]
+ Return:
+ jaccard overlap: (tensor) Shape: [box_a.size(0), box_b.size(0)]
+ """
+ inter = intersect(box_a, box_b)
+ area_a = ((box_a[:, 2] - box_a[:, 0]) * (box_a[:, 3] - box_a[:, 1])).unsqueeze(1).expand_as(inter) # [A,B]
+ area_b = ((box_b[:, 2] - box_b[:, 0]) * (box_b[:, 3] - box_b[:, 1])).unsqueeze(0).expand_as(inter) # [A,B]
+ union = area_a + area_b - inter
+ return inter / union # [A,B]
+
+
+def matrix_iou(a, b):
+ """
+ return iou of a and b, numpy version for data augenmentation
+ """
+ lt = np.maximum(a[:, np.newaxis, :2], b[:, :2])
+ rb = np.minimum(a[:, np.newaxis, 2:], b[:, 2:])
+
+ area_i = np.prod(rb - lt, axis=2) * (lt < rb).all(axis=2)
+ area_a = np.prod(a[:, 2:] - a[:, :2], axis=1)
+ area_b = np.prod(b[:, 2:] - b[:, :2], axis=1)
+ return area_i / (area_a[:, np.newaxis] + area_b - area_i)
+
+
+def matrix_iof(a, b):
+ """
+ return iof of a and b, numpy version for data augenmentation
+ """
+ lt = np.maximum(a[:, np.newaxis, :2], b[:, :2])
+ rb = np.minimum(a[:, np.newaxis, 2:], b[:, 2:])
+
+ area_i = np.prod(rb - lt, axis=2) * (lt < rb).all(axis=2)
+ area_a = np.prod(a[:, 2:] - a[:, :2], axis=1)
+ return area_i / np.maximum(area_a[:, np.newaxis], 1)
+
+
+def match(threshold, truths, priors, variances, labels, landms, loc_t, conf_t, landm_t, idx):
+ """Match each prior box with the ground truth box of the highest jaccard
+ overlap, encode the bounding boxes, then return the matched indices
+ corresponding to both confidence and location preds.
+ Args:
+ threshold: (float) The overlap threshold used when matching boxes.
+ truths: (tensor) Ground truth boxes, Shape: [num_obj, 4].
+ priors: (tensor) Prior boxes from priorbox layers, Shape: [n_priors,4].
+ variances: (tensor) Variances corresponding to each prior coord,
+ Shape: [num_priors, 4].
+ labels: (tensor) All the class labels for the image, Shape: [num_obj].
+ landms: (tensor) Ground truth landms, Shape [num_obj, 10].
+ loc_t: (tensor) Tensor to be filled w/ encoded location targets.
+ conf_t: (tensor) Tensor to be filled w/ matched indices for conf preds.
+ landm_t: (tensor) Tensor to be filled w/ encoded landm targets.
+ idx: (int) current batch index
+ Return:
+ The matched indices corresponding to 1)location 2)confidence
+ 3)landm preds.
+ """
+ # jaccard index
+ overlaps = jaccard(truths, point_form(priors))
+ # (Bipartite Matching)
+ # [1,num_objects] best prior for each ground truth
+ best_prior_overlap, best_prior_idx = overlaps.max(1, keepdim=True)
+
+ # ignore hard gt
+ valid_gt_idx = best_prior_overlap[:, 0] >= 0.2
+ best_prior_idx_filter = best_prior_idx[valid_gt_idx, :]
+ if best_prior_idx_filter.shape[0] <= 0:
+ loc_t[idx] = 0
+ conf_t[idx] = 0
+ return
+
+ # [1,num_priors] best ground truth for each prior
+ best_truth_overlap, best_truth_idx = overlaps.max(0, keepdim=True)
+ best_truth_idx.squeeze_(0)
+ best_truth_overlap.squeeze_(0)
+ best_prior_idx.squeeze_(1)
+ best_prior_idx_filter.squeeze_(1)
+ best_prior_overlap.squeeze_(1)
+ best_truth_overlap.index_fill_(0, best_prior_idx_filter, 2) # ensure best prior
+ # TODO refactor: index best_prior_idx with long tensor
+ # ensure every gt matches with its prior of max overlap
+ for j in range(best_prior_idx.size(0)): # 判别此anchor是预测哪一个boxes
+ best_truth_idx[best_prior_idx[j]] = j
+ matches = truths[best_truth_idx] # Shape: [num_priors,4] 此处为每一个anchor对应的bbox取出来
+ conf = labels[best_truth_idx] # Shape: [num_priors] 此处为每一个anchor对应的label取出来
+ conf[best_truth_overlap < threshold] = 0 # label as background overlap<0.35的全部作为负样本
+ loc = encode(matches, priors, variances)
+
+ matches_landm = landms[best_truth_idx]
+ landm = encode_landm(matches_landm, priors, variances)
+ loc_t[idx] = loc # [num_priors,4] encoded offsets to learn
+ conf_t[idx] = conf # [num_priors] top class label for each prior
+ landm_t[idx] = landm
+
+
+def encode(matched, priors, variances):
+ """Encode the variances from the priorbox layers into the ground truth boxes
+ we have matched (based on jaccard overlap) with the prior boxes.
+ Args:
+ matched: (tensor) Coords of ground truth for each prior in point-form
+ Shape: [num_priors, 4].
+ priors: (tensor) Prior boxes in center-offset form
+ Shape: [num_priors,4].
+ variances: (list[float]) Variances of priorboxes
+ Return:
+ encoded boxes (tensor), Shape: [num_priors, 4]
+ """
+
+ # dist b/t match center and prior's center
+ g_cxcy = (matched[:, :2] + matched[:, 2:]) / 2 - priors[:, :2]
+ # encode variance
+ g_cxcy /= (variances[0] * priors[:, 2:])
+ # match wh / prior wh
+ g_wh = (matched[:, 2:] - matched[:, :2]) / priors[:, 2:]
+ g_wh = torch.log(g_wh) / variances[1]
+ # return target for smooth_l1_loss
+ return torch.cat([g_cxcy, g_wh], 1) # [num_priors,4]
+
+
+def encode_landm(matched, priors, variances):
+ """Encode the variances from the priorbox layers into the ground truth boxes
+ we have matched (based on jaccard overlap) with the prior boxes.
+ Args:
+ matched: (tensor) Coords of ground truth for each prior in point-form
+ Shape: [num_priors, 10].
+ priors: (tensor) Prior boxes in center-offset form
+ Shape: [num_priors,4].
+ variances: (list[float]) Variances of priorboxes
+ Return:
+ encoded landm (tensor), Shape: [num_priors, 10]
+ """
+
+ # dist b/t match center and prior's center
+ matched = torch.reshape(matched, (matched.size(0), 5, 2))
+ priors_cx = priors[:, 0].unsqueeze(1).expand(matched.size(0), 5).unsqueeze(2)
+ priors_cy = priors[:, 1].unsqueeze(1).expand(matched.size(0), 5).unsqueeze(2)
+ priors_w = priors[:, 2].unsqueeze(1).expand(matched.size(0), 5).unsqueeze(2)
+ priors_h = priors[:, 3].unsqueeze(1).expand(matched.size(0), 5).unsqueeze(2)
+ priors = torch.cat([priors_cx, priors_cy, priors_w, priors_h], dim=2)
+ g_cxcy = matched[:, :, :2] - priors[:, :, :2]
+ # encode variance
+ g_cxcy /= (variances[0] * priors[:, :, 2:])
+ # g_cxcy /= priors[:, :, 2:]
+ g_cxcy = g_cxcy.reshape(g_cxcy.size(0), -1)
+ # return target for smooth_l1_loss
+ return g_cxcy
+
+
+# Adapted from https://github.com/Hakuyume/chainer-ssd
+def decode(loc, priors, variances):
+ """Decode locations from predictions using priors to undo
+ the encoding we did for offset regression at train time.
+ Args:
+ loc (tensor): location predictions for loc layers,
+ Shape: [num_priors,4]
+ priors (tensor): Prior boxes in center-offset form.
+ Shape: [num_priors,4].
+ variances: (list[float]) Variances of priorboxes
+ Return:
+ decoded bounding box predictions
+ """
+
+ boxes = torch.cat((priors[:, :2] + loc[:, :2] * variances[0] * priors[:, 2:],
+ priors[:, 2:] * torch.exp(loc[:, 2:] * variances[1])), 1)
+ boxes[:, :2] -= boxes[:, 2:] / 2
+ boxes[:, 2:] += boxes[:, :2]
+ return boxes
+
+
+def decode_landm(pre, priors, variances):
+ """Decode landm from predictions using priors to undo
+ the encoding we did for offset regression at train time.
+ Args:
+ pre (tensor): landm predictions for loc layers,
+ Shape: [num_priors,10]
+ priors (tensor): Prior boxes in center-offset form.
+ Shape: [num_priors,4].
+ variances: (list[float]) Variances of priorboxes
+ Return:
+ decoded landm predictions
+ """
+ tmp = (
+ priors[:, :2] + pre[:, :2] * variances[0] * priors[:, 2:],
+ priors[:, :2] + pre[:, 2:4] * variances[0] * priors[:, 2:],
+ priors[:, :2] + pre[:, 4:6] * variances[0] * priors[:, 2:],
+ priors[:, :2] + pre[:, 6:8] * variances[0] * priors[:, 2:],
+ priors[:, :2] + pre[:, 8:10] * variances[0] * priors[:, 2:],
+ )
+ landms = torch.cat(tmp, dim=1)
+ return landms
+
+
+def batched_decode(b_loc, priors, variances):
+ """Decode locations from predictions using priors to undo
+ the encoding we did for offset regression at train time.
+ Args:
+ b_loc (tensor): location predictions for loc layers,
+ Shape: [num_batches,num_priors,4]
+ priors (tensor): Prior boxes in center-offset form.
+ Shape: [1,num_priors,4].
+ variances: (list[float]) Variances of priorboxes
+ Return:
+ decoded bounding box predictions
+ """
+ boxes = (
+ priors[:, :, :2] + b_loc[:, :, :2] * variances[0] * priors[:, :, 2:],
+ priors[:, :, 2:] * torch.exp(b_loc[:, :, 2:] * variances[1]),
+ )
+ boxes = torch.cat(boxes, dim=2)
+
+ boxes[:, :, :2] -= boxes[:, :, 2:] / 2
+ boxes[:, :, 2:] += boxes[:, :, :2]
+ return boxes
+
+
+def batched_decode_landm(pre, priors, variances):
+ """Decode landm from predictions using priors to undo
+ the encoding we did for offset regression at train time.
+ Args:
+ pre (tensor): landm predictions for loc layers,
+ Shape: [num_batches,num_priors,10]
+ priors (tensor): Prior boxes in center-offset form.
+ Shape: [1,num_priors,4].
+ variances: (list[float]) Variances of priorboxes
+ Return:
+ decoded landm predictions
+ """
+ landms = (
+ priors[:, :, :2] + pre[:, :, :2] * variances[0] * priors[:, :, 2:],
+ priors[:, :, :2] + pre[:, :, 2:4] * variances[0] * priors[:, :, 2:],
+ priors[:, :, :2] + pre[:, :, 4:6] * variances[0] * priors[:, :, 2:],
+ priors[:, :, :2] + pre[:, :, 6:8] * variances[0] * priors[:, :, 2:],
+ priors[:, :, :2] + pre[:, :, 8:10] * variances[0] * priors[:, :, 2:],
+ )
+ landms = torch.cat(landms, dim=2)
+ return landms
+
+
+def log_sum_exp(x):
+ """Utility function for computing log_sum_exp while determining
+ This will be used to determine unaveraged confidence loss across
+ all examples in a batch.
+ Args:
+ x (Variable(tensor)): conf_preds from conf layers
+ """
+ x_max = x.data.max()
+ return torch.log(torch.sum(torch.exp(x - x_max), 1, keepdim=True)) + x_max
+
+
+# Original author: Francisco Massa:
+# https://github.com/fmassa/object-detection.torch
+# Ported to PyTorch by Max deGroot (02/01/2017)
+def nms(boxes, scores, overlap=0.5, top_k=200):
+ """Apply non-maximum suppression at test time to avoid detecting too many
+ overlapping bounding boxes for a given object.
+ Args:
+ boxes: (tensor) The location preds for the img, Shape: [num_priors,4].
+ scores: (tensor) The class predscores for the img, Shape:[num_priors].
+ overlap: (float) The overlap thresh for suppressing unnecessary boxes.
+ top_k: (int) The Maximum number of box preds to consider.
+ Return:
+ The indices of the kept boxes with respect to num_priors.
+ """
+
+ keep = torch.Tensor(scores.size(0)).fill_(0).long()
+ if boxes.numel() == 0:
+ return keep
+ x1 = boxes[:, 0]
+ y1 = boxes[:, 1]
+ x2 = boxes[:, 2]
+ y2 = boxes[:, 3]
+ area = torch.mul(x2 - x1, y2 - y1)
+ v, idx = scores.sort(0) # sort in ascending order
+ # I = I[v >= 0.01]
+ idx = idx[-top_k:] # indices of the top-k largest vals
+ xx1 = boxes.new()
+ yy1 = boxes.new()
+ xx2 = boxes.new()
+ yy2 = boxes.new()
+ w = boxes.new()
+ h = boxes.new()
+
+ # keep = torch.Tensor()
+ count = 0
+ while idx.numel() > 0:
+ i = idx[-1] # index of current largest val
+ # keep.append(i)
+ keep[count] = i
+ count += 1
+ if idx.size(0) == 1:
+ break
+ idx = idx[:-1] # remove kept element from view
+ # load bboxes of next highest vals
+ torch.index_select(x1, 0, idx, out=xx1)
+ torch.index_select(y1, 0, idx, out=yy1)
+ torch.index_select(x2, 0, idx, out=xx2)
+ torch.index_select(y2, 0, idx, out=yy2)
+ # store element-wise max with next highest score
+ xx1 = torch.clamp(xx1, min=x1[i])
+ yy1 = torch.clamp(yy1, min=y1[i])
+ xx2 = torch.clamp(xx2, max=x2[i])
+ yy2 = torch.clamp(yy2, max=y2[i])
+ w.resize_as_(xx2)
+ h.resize_as_(yy2)
+ w = xx2 - xx1
+ h = yy2 - yy1
+ # check sizes of xx1 and xx2.. after each iteration
+ w = torch.clamp(w, min=0.0)
+ h = torch.clamp(h, min=0.0)
+ inter = w * h
+ # IoU = i / (area(a) + area(b) - i)
+ rem_areas = torch.index_select(area, 0, idx) # load remaining areas)
+ union = (rem_areas - inter) + area[i]
+ IoU = inter / union # store result in iou
+ # keep only elements with an IoU <= overlap
+ idx = idx[IoU.le(overlap)]
+ return keep, count
diff --git a/r_facelib/detection/yolov5face/__init__.py b/r_facelib/detection/yolov5face/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/r_facelib/detection/yolov5face/__init__.py
diff --git a/r_facelib/detection/yolov5face/face_detector.py b/r_facelib/detection/yolov5face/face_detector.py
new file mode 100644
index 0000000..ca6d8e3
--- /dev/null
+++ b/r_facelib/detection/yolov5face/face_detector.py
@@ -0,0 +1,141 @@
+import copy
+from pathlib import Path
+
+import cv2
+import numpy as np
+import torch
+from torch import torch_version
+
+from r_facelib.detection.yolov5face.models.common import Conv
+from r_facelib.detection.yolov5face.models.yolo import Model
+from r_facelib.detection.yolov5face.utils.datasets import letterbox
+from r_facelib.detection.yolov5face.utils.general import (
+ check_img_size,
+ non_max_suppression_face,
+ scale_coords,
+ scale_coords_landmarks,
+)
+
+print(f"Torch version: {torch.__version__}")
+IS_HIGH_VERSION = torch_version.__version__ >= "1.9.0"
+
+def isListempty(inList):
+ if isinstance(inList, list): # Is a list
+ return all(map(isListempty, inList))
+ return False # Not a list
+
+class YoloDetector:
+ def __init__(
+ self,
+ config_name,
+ min_face=10,
+ target_size=None,
+ device='cuda',
+ ):
+ """
+ config_name: name of .yaml config with network configuration from models/ folder.
+ min_face : minimal face size in pixels.
+ target_size : target size of smaller image axis (choose lower for faster work). e.g. 480, 720, 1080.
+ None for original resolution.
+ """
+ self._class_path = Path(__file__).parent.absolute()
+ self.target_size = target_size
+ self.min_face = min_face
+ self.detector = Model(cfg=config_name)
+ self.device = device
+
+
+ def _preprocess(self, imgs):
+ """
+ Preprocessing image before passing through the network. Resize and conversion to torch tensor.
+ """
+ pp_imgs = []
+ for img in imgs:
+ h0, w0 = img.shape[:2] # orig hw
+ if self.target_size:
+ r = self.target_size / min(h0, w0) # resize image to img_size
+ if r < 1:
+ img = cv2.resize(img, (int(w0 * r), int(h0 * r)), interpolation=cv2.INTER_LINEAR)
+
+ imgsz = check_img_size(max(img.shape[:2]), s=self.detector.stride.max()) # check img_size
+ img = letterbox(img, new_shape=imgsz)[0]
+ pp_imgs.append(img)
+ pp_imgs = np.array(pp_imgs)
+ pp_imgs = pp_imgs.transpose(0, 3, 1, 2)
+ pp_imgs = torch.from_numpy(pp_imgs).to(self.device)
+ pp_imgs = pp_imgs.float() # uint8 to fp16/32
+ return pp_imgs / 255.0 # 0 - 255 to 0.0 - 1.0
+
+ def _postprocess(self, imgs, origimgs, pred, conf_thres, iou_thres):
+ """
+ Postprocessing of raw pytorch model output.
+ Returns:
+ bboxes: list of arrays with 4 coordinates of bounding boxes with format x1,y1,x2,y2.
+ points: list of arrays with coordinates of 5 facial keypoints (eyes, nose, lips corners).
+ """
+ bboxes = [[] for _ in range(len(origimgs))]
+ landmarks = [[] for _ in range(len(origimgs))]
+
+ pred = non_max_suppression_face(pred, conf_thres, iou_thres)
+
+ for image_id, origimg in enumerate(origimgs):
+ img_shape = origimg.shape
+ image_height, image_width = img_shape[:2]
+ gn = torch.tensor(img_shape)[[1, 0, 1, 0]] # normalization gain whwh
+ gn_lks = torch.tensor(img_shape)[[1, 0, 1, 0, 1, 0, 1, 0, 1, 0]] # normalization gain landmarks
+ det = pred[image_id].cpu()
+ scale_coords(imgs[image_id].shape[1:], det[:, :4], img_shape).round()
+ scale_coords_landmarks(imgs[image_id].shape[1:], det[:, 5:15], img_shape).round()
+
+ for j in range(det.size()[0]):
+ box = (det[j, :4].view(1, 4) / gn).view(-1).tolist()
+ box = list(
+ map(int, [box[0] * image_width, box[1] * image_height, box[2] * image_width, box[3] * image_height])
+ )
+ if box[3] - box[1] < self.min_face:
+ continue
+ lm = (det[j, 5:15].view(1, 10) / gn_lks).view(-1).tolist()
+ lm = list(map(int, [i * image_width if j % 2 == 0 else i * image_height for j, i in enumerate(lm)]))
+ lm = [lm[i : i + 2] for i in range(0, len(lm), 2)]
+ bboxes[image_id].append(box)
+ landmarks[image_id].append(lm)
+ return bboxes, landmarks
+
+ def detect_faces(self, imgs, conf_thres=0.7, iou_thres=0.5):
+ """
+ Get bbox coordinates and keypoints of faces on original image.
+ Params:
+ imgs: image or list of images to detect faces on with BGR order (convert to RGB order for inference)
+ conf_thres: confidence threshold for each prediction
+ iou_thres: threshold for NMS (filter of intersecting bboxes)
+ Returns:
+ bboxes: list of arrays with 4 coordinates of bounding boxes with format x1,y1,x2,y2.
+ points: list of arrays with coordinates of 5 facial keypoints (eyes, nose, lips corners).
+ """
+ # Pass input images through face detector
+ images = imgs if isinstance(imgs, list) else [imgs]
+ images = [cv2.cvtColor(img, cv2.COLOR_BGR2RGB) for img in images]
+ origimgs = copy.deepcopy(images)
+
+ images = self._preprocess(images)
+
+ if IS_HIGH_VERSION:
+ with torch.inference_mode(): # for pytorch>=1.9
+ pred = self.detector(images)[0]
+ else:
+ with torch.no_grad(): # for pytorch<1.9
+ pred = self.detector(images)[0]
+
+ bboxes, points = self._postprocess(images, origimgs, pred, conf_thres, iou_thres)
+
+ # return bboxes, points
+ if not isListempty(points):
+ bboxes = np.array(bboxes).reshape(-1,4)
+ points = np.array(points).reshape(-1,10)
+ padding = bboxes[:,0].reshape(-1,1)
+ return np.concatenate((bboxes, padding, points), axis=1)
+ else:
+ return None
+
+ def __call__(self, *args):
+ return self.predict(*args)
diff --git a/r_facelib/detection/yolov5face/models/__init__.py b/r_facelib/detection/yolov5face/models/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/r_facelib/detection/yolov5face/models/__init__.py
diff --git a/r_facelib/detection/yolov5face/models/common.py b/r_facelib/detection/yolov5face/models/common.py
new file mode 100644
index 0000000..96894d5
--- /dev/null
+++ b/r_facelib/detection/yolov5face/models/common.py
@@ -0,0 +1,299 @@
+# This file contains modules common to various models
+
+import math
+
+import numpy as np
+import torch
+from torch import nn
+
+from r_facelib.detection.yolov5face.utils.datasets import letterbox
+from r_facelib.detection.yolov5face.utils.general import (
+ make_divisible,
+ non_max_suppression,
+ scale_coords,
+ xyxy2xywh,
+)
+
+
+def autopad(k, p=None): # kernel, padding
+ # Pad to 'same'
+ if p is None:
+ p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad
+ return p
+
+
+def channel_shuffle(x, groups):
+ batchsize, num_channels, height, width = x.data.size()
+ channels_per_group = torch.div(num_channels, groups, rounding_mode="trunc")
+
+ # reshape
+ x = x.view(batchsize, groups, channels_per_group, height, width)
+ x = torch.transpose(x, 1, 2).contiguous()
+
+ # flatten
+ return x.view(batchsize, -1, height, width)
+
+
+def DWConv(c1, c2, k=1, s=1, act=True):
+ # Depthwise convolution
+ return Conv(c1, c2, k, s, g=math.gcd(c1, c2), act=act)
+
+
+class Conv(nn.Module):
+ # Standard convolution
+ def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
+ super().__init__()
+ self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False)
+ self.bn = nn.BatchNorm2d(c2)
+ self.act = nn.SiLU() if act is True else (act if isinstance(act, nn.Module) else nn.Identity())
+
+ def forward(self, x):
+ return self.act(self.bn(self.conv(x)))
+
+ def fuseforward(self, x):
+ return self.act(self.conv(x))
+
+
+class StemBlock(nn.Module):
+ def __init__(self, c1, c2, k=3, s=2, p=None, g=1, act=True):
+ super().__init__()
+ self.stem_1 = Conv(c1, c2, k, s, p, g, act)
+ self.stem_2a = Conv(c2, c2 // 2, 1, 1, 0)
+ self.stem_2b = Conv(c2 // 2, c2, 3, 2, 1)
+ self.stem_2p = nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True)
+ self.stem_3 = Conv(c2 * 2, c2, 1, 1, 0)
+
+ def forward(self, x):
+ stem_1_out = self.stem_1(x)
+ stem_2a_out = self.stem_2a(stem_1_out)
+ stem_2b_out = self.stem_2b(stem_2a_out)
+ stem_2p_out = self.stem_2p(stem_1_out)
+ return self.stem_3(torch.cat((stem_2b_out, stem_2p_out), 1))
+
+
+class Bottleneck(nn.Module):
+ # Standard bottleneck
+ def __init__(self, c1, c2, shortcut=True, g=1, e=0.5): # ch_in, ch_out, shortcut, groups, expansion
+ super().__init__()
+ c_ = int(c2 * e) # hidden channels
+ self.cv1 = Conv(c1, c_, 1, 1)
+ self.cv2 = Conv(c_, c2, 3, 1, g=g)
+ self.add = shortcut and c1 == c2
+
+ def forward(self, x):
+ return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
+
+
+class BottleneckCSP(nn.Module):
+ # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
+ def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
+ super().__init__()
+ c_ = int(c2 * e) # hidden channels
+ self.cv1 = Conv(c1, c_, 1, 1)
+ self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False)
+ self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False)
+ self.cv4 = Conv(2 * c_, c2, 1, 1)
+ self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3)
+ self.act = nn.LeakyReLU(0.1, inplace=True)
+ self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)))
+
+ def forward(self, x):
+ y1 = self.cv3(self.m(self.cv1(x)))
+ y2 = self.cv2(x)
+ return self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1))))
+
+
+class C3(nn.Module):
+ # CSP Bottleneck with 3 convolutions
+ def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
+ super().__init__()
+ c_ = int(c2 * e) # hidden channels
+ self.cv1 = Conv(c1, c_, 1, 1)
+ self.cv2 = Conv(c1, c_, 1, 1)
+ self.cv3 = Conv(2 * c_, c2, 1) # act=FReLU(c2)
+ self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)))
+
+ def forward(self, x):
+ return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), dim=1))
+
+
+class ShuffleV2Block(nn.Module):
+ def __init__(self, inp, oup, stride):
+ super().__init__()
+
+ if not 1 <= stride <= 3:
+ raise ValueError("illegal stride value")
+ self.stride = stride
+
+ branch_features = oup // 2
+
+ if self.stride > 1:
+ self.branch1 = nn.Sequential(
+ self.depthwise_conv(inp, inp, kernel_size=3, stride=self.stride, padding=1),
+ nn.BatchNorm2d(inp),
+ nn.Conv2d(inp, branch_features, kernel_size=1, stride=1, padding=0, bias=False),
+ nn.BatchNorm2d(branch_features),
+ nn.SiLU(),
+ )
+ else:
+ self.branch1 = nn.Sequential()
+
+ self.branch2 = nn.Sequential(
+ nn.Conv2d(
+ inp if (self.stride > 1) else branch_features,
+ branch_features,
+ kernel_size=1,
+ stride=1,
+ padding=0,
+ bias=False,
+ ),
+ nn.BatchNorm2d(branch_features),
+ nn.SiLU(),
+ self.depthwise_conv(branch_features, branch_features, kernel_size=3, stride=self.stride, padding=1),
+ nn.BatchNorm2d(branch_features),
+ nn.Conv2d(branch_features, branch_features, kernel_size=1, stride=1, padding=0, bias=False),
+ nn.BatchNorm2d(branch_features),
+ nn.SiLU(),
+ )
+
+ @staticmethod
+ def depthwise_conv(i, o, kernel_size, stride=1, padding=0, bias=False):
+ return nn.Conv2d(i, o, kernel_size, stride, padding, bias=bias, groups=i)
+
+ def forward(self, x):
+ if self.stride == 1:
+ x1, x2 = x.chunk(2, dim=1)
+ out = torch.cat((x1, self.branch2(x2)), dim=1)
+ else:
+ out = torch.cat((self.branch1(x), self.branch2(x)), dim=1)
+ out = channel_shuffle(out, 2)
+ return out
+
+
+class SPP(nn.Module):
+ # Spatial pyramid pooling layer used in YOLOv3-SPP
+ def __init__(self, c1, c2, k=(5, 9, 13)):
+ super().__init__()
+ c_ = c1 // 2 # hidden channels
+ self.cv1 = Conv(c1, c_, 1, 1)
+ self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1)
+ self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k])
+
+ def forward(self, x):
+ x = self.cv1(x)
+ return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1))
+
+
+class Focus(nn.Module):
+ # Focus wh information into c-space
+ def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
+ super().__init__()
+ self.conv = Conv(c1 * 4, c2, k, s, p, g, act)
+
+ def forward(self, x): # x(b,c,w,h) -> y(b,4c,w/2,h/2)
+ return self.conv(torch.cat([x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]], 1))
+
+
+class Concat(nn.Module):
+ # Concatenate a list of tensors along dimension
+ def __init__(self, dimension=1):
+ super().__init__()
+ self.d = dimension
+
+ def forward(self, x):
+ return torch.cat(x, self.d)
+
+
+class NMS(nn.Module):
+ # Non-Maximum Suppression (NMS) module
+ conf = 0.25 # confidence threshold
+ iou = 0.45 # IoU threshold
+ classes = None # (optional list) filter by class
+
+ def forward(self, x):
+ return non_max_suppression(x[0], conf_thres=self.conf, iou_thres=self.iou, classes=self.classes)
+
+
+class AutoShape(nn.Module):
+ # input-robust model wrapper for passing cv2/np/PIL/torch inputs. Includes preprocessing, inference and NMS
+ img_size = 640 # inference size (pixels)
+ conf = 0.25 # NMS confidence threshold
+ iou = 0.45 # NMS IoU threshold
+ classes = None # (optional list) filter by class
+
+ def __init__(self, model):
+ super().__init__()
+ self.model = model.eval()
+
+ def autoshape(self):
+ print("autoShape already enabled, skipping... ") # model already converted to model.autoshape()
+ return self
+
+ def forward(self, imgs, size=640, augment=False, profile=False):
+ # Inference from various sources. For height=720, width=1280, RGB images example inputs are:
+ # OpenCV: = cv2.imread('image.jpg')[:,:,::-1] # HWC BGR to RGB x(720,1280,3)
+ # PIL: = Image.open('image.jpg') # HWC x(720,1280,3)
+ # numpy: = np.zeros((720,1280,3)) # HWC
+ # torch: = torch.zeros(16,3,720,1280) # BCHW
+ # multiple: = [Image.open('image1.jpg'), Image.open('image2.jpg'), ...] # list of images
+
+ p = next(self.model.parameters()) # for device and type
+ if isinstance(imgs, torch.Tensor): # torch
+ return self.model(imgs.to(p.device).type_as(p), augment, profile) # inference
+
+ # Pre-process
+ n, imgs = (len(imgs), imgs) if isinstance(imgs, list) else (1, [imgs]) # number of images, list of images
+ shape0, shape1 = [], [] # image and inference shapes
+ for i, im in enumerate(imgs):
+ im = np.array(im) # to numpy
+ if im.shape[0] < 5: # image in CHW
+ im = im.transpose((1, 2, 0)) # reverse dataloader .transpose(2, 0, 1)
+ im = im[:, :, :3] if im.ndim == 3 else np.tile(im[:, :, None], 3) # enforce 3ch input
+ s = im.shape[:2] # HWC
+ shape0.append(s) # image shape
+ g = size / max(s) # gain
+ shape1.append([y * g for y in s])
+ imgs[i] = im # update
+ shape1 = [make_divisible(x, int(self.stride.max())) for x in np.stack(shape1, 0).max(0)] # inference shape
+ x = [letterbox(im, new_shape=shape1, auto=False)[0] for im in imgs] # pad
+ x = np.stack(x, 0) if n > 1 else x[0][None] # stack
+ x = np.ascontiguousarray(x.transpose((0, 3, 1, 2))) # BHWC to BCHW
+ x = torch.from_numpy(x).to(p.device).type_as(p) / 255.0 # uint8 to fp16/32
+
+ # Inference
+ with torch.no_grad():
+ y = self.model(x, augment, profile)[0] # forward
+ y = non_max_suppression(y, conf_thres=self.conf, iou_thres=self.iou, classes=self.classes) # NMS
+
+ # Post-process
+ for i in range(n):
+ scale_coords(shape1, y[i][:, :4], shape0[i])
+
+ return Detections(imgs, y, self.names)
+
+
+class Detections:
+ # detections class for YOLOv5 inference results
+ def __init__(self, imgs, pred, names=None):
+ super().__init__()
+ d = pred[0].device # device
+ gn = [torch.tensor([*(im.shape[i] for i in [1, 0, 1, 0]), 1.0, 1.0], device=d) for im in imgs] # normalizations
+ self.imgs = imgs # list of images as numpy arrays
+ self.pred = pred # list of tensors pred[0] = (xyxy, conf, cls)
+ self.names = names # class names
+ self.xyxy = pred # xyxy pixels
+ self.xywh = [xyxy2xywh(x) for x in pred] # xywh pixels
+ self.xyxyn = [x / g for x, g in zip(self.xyxy, gn)] # xyxy normalized
+ self.xywhn = [x / g for x, g in zip(self.xywh, gn)] # xywh normalized
+ self.n = len(self.pred)
+
+ def __len__(self):
+ return self.n
+
+ def tolist(self):
+ # return a list of Detections objects, i.e. 'for result in results.tolist():'
+ x = [Detections([self.imgs[i]], [self.pred[i]], self.names) for i in range(self.n)]
+ for d in x:
+ for k in ["imgs", "pred", "xyxy", "xyxyn", "xywh", "xywhn"]:
+ setattr(d, k, getattr(d, k)[0]) # pop out of list
+ return x
diff --git a/r_facelib/detection/yolov5face/models/experimental.py b/r_facelib/detection/yolov5face/models/experimental.py
new file mode 100644
index 0000000..bdf7aea
--- /dev/null
+++ b/r_facelib/detection/yolov5face/models/experimental.py
@@ -0,0 +1,45 @@
+# # This file contains experimental modules
+
+import numpy as np
+import torch
+from torch import nn
+
+from r_facelib.detection.yolov5face.models.common import Conv
+
+
+class CrossConv(nn.Module):
+ # Cross Convolution Downsample
+ def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False):
+ # ch_in, ch_out, kernel, stride, groups, expansion, shortcut
+ super().__init__()
+ c_ = int(c2 * e) # hidden channels
+ self.cv1 = Conv(c1, c_, (1, k), (1, s))
+ self.cv2 = Conv(c_, c2, (k, 1), (s, 1), g=g)
+ self.add = shortcut and c1 == c2
+
+ def forward(self, x):
+ return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
+
+
+class MixConv2d(nn.Module):
+ # Mixed Depthwise Conv https://arxiv.org/abs/1907.09595
+ def __init__(self, c1, c2, k=(1, 3), s=1, equal_ch=True):
+ super().__init__()
+ groups = len(k)
+ if equal_ch: # equal c_ per group
+ i = torch.linspace(0, groups - 1e-6, c2).floor() # c2 indices
+ c_ = [(i == g).sum() for g in range(groups)] # intermediate channels
+ else: # equal weight.numel() per group
+ b = [c2] + [0] * groups
+ a = np.eye(groups + 1, groups, k=-1)
+ a -= np.roll(a, 1, axis=1)
+ a *= np.array(k) ** 2
+ a[0] = 1
+ c_ = np.linalg.lstsq(a, b, rcond=None)[0].round() # solve for equal weight indices, ax = b
+
+ self.m = nn.ModuleList([nn.Conv2d(c1, int(c_[g]), k[g], s, k[g] // 2, bias=False) for g in range(groups)])
+ self.bn = nn.BatchNorm2d(c2)
+ self.act = nn.LeakyReLU(0.1, inplace=True)
+
+ def forward(self, x):
+ return x + self.act(self.bn(torch.cat([m(x) for m in self.m], 1)))
diff --git a/r_facelib/detection/yolov5face/models/yolo.py b/r_facelib/detection/yolov5face/models/yolo.py
new file mode 100644
index 0000000..02479dc
--- /dev/null
+++ b/r_facelib/detection/yolov5face/models/yolo.py
@@ -0,0 +1,235 @@
+import math
+from copy import deepcopy
+from pathlib import Path
+
+import torch
+import yaml # for torch hub
+from torch import nn
+
+from r_facelib.detection.yolov5face.models.common import (
+ C3,
+ NMS,
+ SPP,
+ AutoShape,
+ Bottleneck,
+ BottleneckCSP,
+ Concat,
+ Conv,
+ DWConv,
+ Focus,
+ ShuffleV2Block,
+ StemBlock,
+)
+from r_facelib.detection.yolov5face.models.experimental import CrossConv, MixConv2d
+from r_facelib.detection.yolov5face.utils.autoanchor import check_anchor_order
+from r_facelib.detection.yolov5face.utils.general import make_divisible
+from r_facelib.detection.yolov5face.utils.torch_utils import copy_attr, fuse_conv_and_bn
+
+
+class Detect(nn.Module):
+ stride = None # strides computed during build
+ export = False # onnx export
+
+ def __init__(self, nc=80, anchors=(), ch=()): # detection layer
+ super().__init__()
+ self.nc = nc # number of classes
+ self.no = nc + 5 + 10 # number of outputs per anchor
+
+ self.nl = len(anchors) # number of detection layers
+ self.na = len(anchors[0]) // 2 # number of anchors
+ self.grid = [torch.zeros(1)] * self.nl # init grid
+ a = torch.tensor(anchors).float().view(self.nl, -1, 2)
+ self.register_buffer("anchors", a) # shape(nl,na,2)
+ self.register_buffer("anchor_grid", a.clone().view(self.nl, 1, -1, 1, 1, 2)) # shape(nl,1,na,1,1,2)
+ self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch) # output conv
+
+ def forward(self, x):
+ z = [] # inference output
+ if self.export:
+ for i in range(self.nl):
+ x[i] = self.m[i](x[i])
+ return x
+ for i in range(self.nl):
+ x[i] = self.m[i](x[i]) # conv
+ bs, _, ny, nx = x[i].shape # x(bs,255,20,20) to x(bs,3,20,20,85)
+ x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()
+
+ if not self.training: # inference
+ if self.grid[i].shape[2:4] != x[i].shape[2:4]:
+ self.grid[i] = self._make_grid(nx, ny).to(x[i].device)
+
+ y = torch.full_like(x[i], 0)
+ y[..., [0, 1, 2, 3, 4, 15]] = x[i][..., [0, 1, 2, 3, 4, 15]].sigmoid()
+ y[..., 5:15] = x[i][..., 5:15]
+
+ y[..., 0:2] = (y[..., 0:2] * 2.0 - 0.5 + self.grid[i].to(x[i].device)) * self.stride[i] # xy
+ y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh
+
+ y[..., 5:7] = (
+ y[..., 5:7] * self.anchor_grid[i] + self.grid[i].to(x[i].device) * self.stride[i]
+ ) # landmark x1 y1
+ y[..., 7:9] = (
+ y[..., 7:9] * self.anchor_grid[i] + self.grid[i].to(x[i].device) * self.stride[i]
+ ) # landmark x2 y2
+ y[..., 9:11] = (
+ y[..., 9:11] * self.anchor_grid[i] + self.grid[i].to(x[i].device) * self.stride[i]
+ ) # landmark x3 y3
+ y[..., 11:13] = (
+ y[..., 11:13] * self.anchor_grid[i] + self.grid[i].to(x[i].device) * self.stride[i]
+ ) # landmark x4 y4
+ y[..., 13:15] = (
+ y[..., 13:15] * self.anchor_grid[i] + self.grid[i].to(x[i].device) * self.stride[i]
+ ) # landmark x5 y5
+
+ z.append(y.view(bs, -1, self.no))
+
+ return x if self.training else (torch.cat(z, 1), x)
+
+ @staticmethod
+ def _make_grid(nx=20, ny=20):
+ # yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)], indexing="ij") # for pytorch>=1.10
+ yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)])
+ return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float()
+
+
+class Model(nn.Module):
+ def __init__(self, cfg="yolov5s.yaml", ch=3, nc=None): # model, input channels, number of classes
+ super().__init__()
+ self.yaml_file = Path(cfg).name
+ with Path(cfg).open(encoding="utf8") as f:
+ self.yaml = yaml.safe_load(f) # model dict
+
+ # Define model
+ ch = self.yaml["ch"] = self.yaml.get("ch", ch) # input channels
+ if nc and nc != self.yaml["nc"]:
+ self.yaml["nc"] = nc # override yaml value
+
+ self.model, self.save = parse_model(deepcopy(self.yaml), ch=[ch]) # model, savelist
+ self.names = [str(i) for i in range(self.yaml["nc"])] # default names
+
+ # Build strides, anchors
+ m = self.model[-1] # Detect()
+ if isinstance(m, Detect):
+ s = 128 # 2x min stride
+ m.stride = torch.tensor([s / x.shape[-2] for x in self.forward(torch.zeros(1, ch, s, s))]) # forward
+ m.anchors /= m.stride.view(-1, 1, 1)
+ check_anchor_order(m)
+ self.stride = m.stride
+ self._initialize_biases() # only run once
+
+ def forward(self, x):
+ return self.forward_once(x) # single-scale inference, train
+
+ def forward_once(self, x):
+ y = [] # outputs
+ for m in self.model:
+ if m.f != -1: # if not from previous layer
+ x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f] # from earlier layers
+
+ x = m(x) # run
+ y.append(x if m.i in self.save else None) # save output
+
+ return x
+
+ def _initialize_biases(self, cf=None): # initialize biases into Detect(), cf is class frequency
+ # https://arxiv.org/abs/1708.02002 section 3.3
+ m = self.model[-1] # Detect() module
+ for mi, s in zip(m.m, m.stride): # from
+ b = mi.bias.view(m.na, -1) # conv.bias(255) to (3,85)
+ b.data[:, 4] += math.log(8 / (640 / s) ** 2) # obj (8 objects per 640 image)
+ b.data[:, 5:] += math.log(0.6 / (m.nc - 0.99)) if cf is None else torch.log(cf / cf.sum()) # cls
+ mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
+
+ def _print_biases(self):
+ m = self.model[-1] # Detect() module
+ for mi in m.m: # from
+ b = mi.bias.detach().view(m.na, -1).T # conv.bias(255) to (3,85)
+ print(("%6g Conv2d.bias:" + "%10.3g" * 6) % (mi.weight.shape[1], *b[:5].mean(1).tolist(), b[5:].mean()))
+
+ def fuse(self): # fuse model Conv2d() + BatchNorm2d() layers
+ print("Fusing layers... ")
+ for m in self.model.modules():
+ if isinstance(m, Conv) and hasattr(m, "bn"):
+ m.conv = fuse_conv_and_bn(m.conv, m.bn) # update conv
+ delattr(m, "bn") # remove batchnorm
+ m.forward = m.fuseforward # update forward
+ elif type(m) is nn.Upsample:
+ m.recompute_scale_factor = None # torch 1.11.0 compatibility
+ return self
+
+ def nms(self, mode=True): # add or remove NMS module
+ present = isinstance(self.model[-1], NMS) # last layer is NMS
+ if mode and not present:
+ print("Adding NMS... ")
+ m = NMS() # module
+ m.f = -1 # from
+ m.i = self.model[-1].i + 1 # index
+ self.model.add_module(name=str(m.i), module=m) # add
+ self.eval()
+ elif not mode and present:
+ print("Removing NMS... ")
+ self.model = self.model[:-1] # remove
+ return self
+
+ def autoshape(self): # add autoShape module
+ print("Adding autoShape... ")
+ m = AutoShape(self) # wrap model
+ copy_attr(m, self, include=("yaml", "nc", "hyp", "names", "stride"), exclude=()) # copy attributes
+ return m
+
+
+def parse_model(d, ch): # model_dict, input_channels(3)
+ anchors, nc, gd, gw = d["anchors"], d["nc"], d["depth_multiple"], d["width_multiple"]
+ na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors # number of anchors
+ no = na * (nc + 5) # number of outputs = anchors * (classes + 5)
+
+ layers, save, c2 = [], [], ch[-1] # layers, savelist, ch out
+ for i, (f, n, m, args) in enumerate(d["backbone"] + d["head"]): # from, number, module, args
+ m = eval(m) if isinstance(m, str) else m # eval strings
+ for j, a in enumerate(args):
+ try:
+ args[j] = eval(a) if isinstance(a, str) else a # eval strings
+ except:
+ pass
+
+ n = max(round(n * gd), 1) if n > 1 else n # depth gain
+ if m in [
+ Conv,
+ Bottleneck,
+ SPP,
+ DWConv,
+ MixConv2d,
+ Focus,
+ CrossConv,
+ BottleneckCSP,
+ C3,
+ ShuffleV2Block,
+ StemBlock,
+ ]:
+ c1, c2 = ch[f], args[0]
+
+ c2 = make_divisible(c2 * gw, 8) if c2 != no else c2
+
+ args = [c1, c2, *args[1:]]
+ if m in [BottleneckCSP, C3]:
+ args.insert(2, n)
+ n = 1
+ elif m is nn.BatchNorm2d:
+ args = [ch[f]]
+ elif m is Concat:
+ c2 = sum(ch[-1 if x == -1 else x + 1] for x in f)
+ elif m is Detect:
+ args.append([ch[x + 1] for x in f])
+ if isinstance(args[1], int): # number of anchors
+ args[1] = [list(range(args[1] * 2))] * len(f)
+ else:
+ c2 = ch[f]
+
+ m_ = nn.Sequential(*(m(*args) for _ in range(n))) if n > 1 else m(*args) # module
+ t = str(m)[8:-2].replace("__main__.", "") # module type
+ np = sum(x.numel() for x in m_.parameters()) # number params
+ m_.i, m_.f, m_.type, m_.np = i, f, t, np # attach index, 'from' index, type, number params
+ save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1) # append to savelist
+ layers.append(m_)
+ ch.append(c2)
+ return nn.Sequential(*layers), sorted(save)
diff --git a/r_facelib/detection/yolov5face/models/yolov5l.yaml b/r_facelib/detection/yolov5face/models/yolov5l.yaml
new file mode 100644
index 0000000..98a9e2c
--- /dev/null
+++ b/r_facelib/detection/yolov5face/models/yolov5l.yaml
@@ -0,0 +1,47 @@
+# parameters
+nc: 1 # number of classes
+depth_multiple: 1.0 # model depth multiple
+width_multiple: 1.0 # layer channel multiple
+
+# anchors
+anchors:
+ - [4,5, 8,10, 13,16] # P3/8
+ - [23,29, 43,55, 73,105] # P4/16
+ - [146,217, 231,300, 335,433] # P5/32
+
+# YOLOv5 backbone
+backbone:
+ # [from, number, module, args]
+ [[-1, 1, StemBlock, [64, 3, 2]], # 0-P1/2
+ [-1, 3, C3, [128]],
+ [-1, 1, Conv, [256, 3, 2]], # 2-P3/8
+ [-1, 9, C3, [256]],
+ [-1, 1, Conv, [512, 3, 2]], # 4-P4/16
+ [-1, 9, C3, [512]],
+ [-1, 1, Conv, [1024, 3, 2]], # 6-P5/32
+ [-1, 1, SPP, [1024, [3,5,7]]],
+ [-1, 3, C3, [1024, False]], # 8
+ ]
+
+# YOLOv5 head
+head:
+ [[-1, 1, Conv, [512, 1, 1]],
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+ [[-1, 5], 1, Concat, [1]], # cat backbone P4
+ [-1, 3, C3, [512, False]], # 12
+
+ [-1, 1, Conv, [256, 1, 1]],
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+ [[-1, 3], 1, Concat, [1]], # cat backbone P3
+ [-1, 3, C3, [256, False]], # 16 (P3/8-small)
+
+ [-1, 1, Conv, [256, 3, 2]],
+ [[-1, 13], 1, Concat, [1]], # cat head P4
+ [-1, 3, C3, [512, False]], # 19 (P4/16-medium)
+
+ [-1, 1, Conv, [512, 3, 2]],
+ [[-1, 9], 1, Concat, [1]], # cat head P5
+ [-1, 3, C3, [1024, False]], # 22 (P5/32-large)
+
+ [[16, 19, 22], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
+ ] \ No newline at end of file
diff --git a/r_facelib/detection/yolov5face/models/yolov5n.yaml b/r_facelib/detection/yolov5face/models/yolov5n.yaml
new file mode 100644
index 0000000..0a03fb0
--- /dev/null
+++ b/r_facelib/detection/yolov5face/models/yolov5n.yaml
@@ -0,0 +1,45 @@
+# parameters
+nc: 1 # number of classes
+depth_multiple: 1.0 # model depth multiple
+width_multiple: 1.0 # layer channel multiple
+
+# anchors
+anchors:
+ - [4,5, 8,10, 13,16] # P3/8
+ - [23,29, 43,55, 73,105] # P4/16
+ - [146,217, 231,300, 335,433] # P5/32
+
+# YOLOv5 backbone
+backbone:
+ # [from, number, module, args]
+ [[-1, 1, StemBlock, [32, 3, 2]], # 0-P2/4
+ [-1, 1, ShuffleV2Block, [128, 2]], # 1-P3/8
+ [-1, 3, ShuffleV2Block, [128, 1]], # 2
+ [-1, 1, ShuffleV2Block, [256, 2]], # 3-P4/16
+ [-1, 7, ShuffleV2Block, [256, 1]], # 4
+ [-1, 1, ShuffleV2Block, [512, 2]], # 5-P5/32
+ [-1, 3, ShuffleV2Block, [512, 1]], # 6
+ ]
+
+# YOLOv5 head
+head:
+ [[-1, 1, Conv, [128, 1, 1]],
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+ [[-1, 4], 1, Concat, [1]], # cat backbone P4
+ [-1, 1, C3, [128, False]], # 10
+
+ [-1, 1, Conv, [128, 1, 1]],
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+ [[-1, 2], 1, Concat, [1]], # cat backbone P3
+ [-1, 1, C3, [128, False]], # 14 (P3/8-small)
+
+ [-1, 1, Conv, [128, 3, 2]],
+ [[-1, 11], 1, Concat, [1]], # cat head P4
+ [-1, 1, C3, [128, False]], # 17 (P4/16-medium)
+
+ [-1, 1, Conv, [128, 3, 2]],
+ [[-1, 7], 1, Concat, [1]], # cat head P5
+ [-1, 1, C3, [128, False]], # 20 (P5/32-large)
+
+ [[14, 17, 20], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
+ ]
diff --git a/r_facelib/detection/yolov5face/utils/__init__.py b/r_facelib/detection/yolov5face/utils/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/r_facelib/detection/yolov5face/utils/__init__.py
diff --git a/r_facelib/detection/yolov5face/utils/autoanchor.py b/r_facelib/detection/yolov5face/utils/autoanchor.py
new file mode 100644
index 0000000..cb0de89
--- /dev/null
+++ b/r_facelib/detection/yolov5face/utils/autoanchor.py
@@ -0,0 +1,12 @@
+# Auto-anchor utils
+
+
+def check_anchor_order(m):
+ # Check anchor order against stride order for YOLOv5 Detect() module m, and correct if necessary
+ a = m.anchor_grid.prod(-1).view(-1) # anchor area
+ da = a[-1] - a[0] # delta a
+ ds = m.stride[-1] - m.stride[0] # delta s
+ if da.sign() != ds.sign(): # same order
+ print("Reversing anchor order")
+ m.anchors[:] = m.anchors.flip(0)
+ m.anchor_grid[:] = m.anchor_grid.flip(0)
diff --git a/r_facelib/detection/yolov5face/utils/datasets.py b/r_facelib/detection/yolov5face/utils/datasets.py
new file mode 100644
index 0000000..a72609b
--- /dev/null
+++ b/r_facelib/detection/yolov5face/utils/datasets.py
@@ -0,0 +1,35 @@
+import cv2
+import numpy as np
+
+
+def letterbox(img, new_shape=(640, 640), color=(114, 114, 114), auto=True, scale_fill=False, scaleup=True):
+ # Resize image to a 32-pixel-multiple rectangle https://github.com/ultralytics/yolov3/issues/232
+ shape = img.shape[:2] # current shape [height, width]
+ if isinstance(new_shape, int):
+ new_shape = (new_shape, new_shape)
+
+ # Scale ratio (new / old)
+ r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
+ if not scaleup: # only scale down, do not scale up (for better test mAP)
+ r = min(r, 1.0)
+
+ # Compute padding
+ ratio = r, r # width, height ratios
+ new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
+ dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding
+ if auto: # minimum rectangle
+ dw, dh = np.mod(dw, 64), np.mod(dh, 64) # wh padding
+ elif scale_fill: # stretch
+ dw, dh = 0.0, 0.0
+ new_unpad = (new_shape[1], new_shape[0])
+ ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios
+
+ dw /= 2 # divide padding into 2 sides
+ dh /= 2
+
+ if shape[::-1] != new_unpad: # resize
+ img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)
+ top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
+ left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
+ img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border
+ return img, ratio, (dw, dh)
diff --git a/r_facelib/detection/yolov5face/utils/extract_ckpt.py b/r_facelib/detection/yolov5face/utils/extract_ckpt.py
new file mode 100644
index 0000000..e6bde00
--- /dev/null
+++ b/r_facelib/detection/yolov5face/utils/extract_ckpt.py
@@ -0,0 +1,5 @@
+import torch
+import sys
+sys.path.insert(0,'./facelib/detection/yolov5face')
+model = torch.load('facelib/detection/yolov5face/yolov5n-face.pt', map_location='cpu')['model']
+torch.save(model.state_dict(),'../../models/facedetection') \ No newline at end of file
diff --git a/r_facelib/detection/yolov5face/utils/general.py b/r_facelib/detection/yolov5face/utils/general.py
new file mode 100644
index 0000000..618d2f3
--- /dev/null
+++ b/r_facelib/detection/yolov5face/utils/general.py
@@ -0,0 +1,271 @@
+import math
+import time
+
+import numpy as np
+import torch
+import torchvision
+
+
+def check_img_size(img_size, s=32):
+ # Verify img_size is a multiple of stride s
+ new_size = make_divisible(img_size, int(s)) # ceil gs-multiple
+ # if new_size != img_size:
+ # print(f"WARNING: --img-size {img_size:g} must be multiple of max stride {s:g}, updating to {new_size:g}")
+ return new_size
+
+
+def make_divisible(x, divisor):
+ # Returns x evenly divisible by divisor
+ return math.ceil(x / divisor) * divisor
+
+
+def xyxy2xywh(x):
+ # Convert nx4 boxes from [x1, y1, x2, y2] to [x, y, w, h] where xy1=top-left, xy2=bottom-right
+ y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
+ y[:, 0] = (x[:, 0] + x[:, 2]) / 2 # x center
+ y[:, 1] = (x[:, 1] + x[:, 3]) / 2 # y center
+ y[:, 2] = x[:, 2] - x[:, 0] # width
+ y[:, 3] = x[:, 3] - x[:, 1] # height
+ return y
+
+
+def xywh2xyxy(x):
+ # Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
+ y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
+ y[:, 0] = x[:, 0] - x[:, 2] / 2 # top left x
+ y[:, 1] = x[:, 1] - x[:, 3] / 2 # top left y
+ y[:, 2] = x[:, 0] + x[:, 2] / 2 # bottom right x
+ y[:, 3] = x[:, 1] + x[:, 3] / 2 # bottom right y
+ return y
+
+
+def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None):
+ # Rescale coords (xyxy) from img1_shape to img0_shape
+ if ratio_pad is None: # calculate from img0_shape
+ gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new
+ pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2 # wh padding
+ else:
+ gain = ratio_pad[0][0]
+ pad = ratio_pad[1]
+
+ coords[:, [0, 2]] -= pad[0] # x padding
+ coords[:, [1, 3]] -= pad[1] # y padding
+ coords[:, :4] /= gain
+ clip_coords(coords, img0_shape)
+ return coords
+
+
+def clip_coords(boxes, img_shape):
+ # Clip bounding xyxy bounding boxes to image shape (height, width)
+ boxes[:, 0].clamp_(0, img_shape[1]) # x1
+ boxes[:, 1].clamp_(0, img_shape[0]) # y1
+ boxes[:, 2].clamp_(0, img_shape[1]) # x2
+ boxes[:, 3].clamp_(0, img_shape[0]) # y2
+
+
+def box_iou(box1, box2):
+ # https://github.com/pytorch/vision/blob/master/torchvision/ops/boxes.py
+ """
+ Return intersection-over-union (Jaccard index) of boxes.
+ Both sets of boxes are expected to be in (x1, y1, x2, y2) format.
+ Arguments:
+ box1 (Tensor[N, 4])
+ box2 (Tensor[M, 4])
+ Returns:
+ iou (Tensor[N, M]): the NxM matrix containing the pairwise
+ IoU values for every element in boxes1 and boxes2
+ """
+
+ def box_area(box):
+ return (box[2] - box[0]) * (box[3] - box[1])
+
+ area1 = box_area(box1.T)
+ area2 = box_area(box2.T)
+
+ inter = (torch.min(box1[:, None, 2:], box2[:, 2:]) - torch.max(box1[:, None, :2], box2[:, :2])).clamp(0).prod(2)
+ return inter / (area1[:, None] + area2 - inter)
+
+
+def non_max_suppression_face(prediction, conf_thres=0.25, iou_thres=0.45, classes=None, agnostic=False, labels=()):
+ """Performs Non-Maximum Suppression (NMS) on inference results
+ Returns:
+ detections with shape: nx6 (x1, y1, x2, y2, conf, cls)
+ """
+
+ nc = prediction.shape[2] - 15 # number of classes
+ xc = prediction[..., 4] > conf_thres # candidates
+
+ # Settings
+ # (pixels) maximum box width and height
+ max_wh = 4096
+ time_limit = 10.0 # seconds to quit after
+ redundant = True # require redundant detections
+ multi_label = nc > 1 # multiple labels per box (adds 0.5ms/img)
+ merge = False # use merge-NMS
+
+ t = time.time()
+ output = [torch.zeros((0, 16), device=prediction.device)] * prediction.shape[0]
+ for xi, x in enumerate(prediction): # image index, image inference
+ # Apply constraints
+ x = x[xc[xi]] # confidence
+
+ # Cat apriori labels if autolabelling
+ if labels and len(labels[xi]):
+ label = labels[xi]
+ v = torch.zeros((len(label), nc + 15), device=x.device)
+ v[:, :4] = label[:, 1:5] # box
+ v[:, 4] = 1.0 # conf
+ v[range(len(label)), label[:, 0].long() + 15] = 1.0 # cls
+ x = torch.cat((x, v), 0)
+
+ # If none remain process next image
+ if not x.shape[0]:
+ continue
+
+ # Compute conf
+ x[:, 15:] *= x[:, 4:5] # conf = obj_conf * cls_conf
+
+ # Box (center x, center y, width, height) to (x1, y1, x2, y2)
+ box = xywh2xyxy(x[:, :4])
+
+ # Detections matrix nx6 (xyxy, conf, landmarks, cls)
+ if multi_label:
+ i, j = (x[:, 15:] > conf_thres).nonzero(as_tuple=False).T
+ x = torch.cat((box[i], x[i, j + 15, None], x[:, 5:15], j[:, None].float()), 1)
+ else: # best class only
+ conf, j = x[:, 15:].max(1, keepdim=True)
+ x = torch.cat((box, conf, x[:, 5:15], j.float()), 1)[conf.view(-1) > conf_thres]
+
+ # Filter by class
+ if classes is not None:
+ x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)]
+
+ # If none remain process next image
+ n = x.shape[0] # number of boxes
+ if not n:
+ continue
+
+ # Batched NMS
+ c = x[:, 15:16] * (0 if agnostic else max_wh) # classes
+ boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores
+ i = torchvision.ops.nms(boxes, scores, iou_thres) # NMS
+
+ if merge and (1 < n < 3e3): # Merge NMS (boxes merged using weighted mean)
+ # update boxes as boxes(i,4) = weights(i,n) * boxes(n,4)
+ iou = box_iou(boxes[i], boxes) > iou_thres # iou matrix
+ weights = iou * scores[None] # box weights
+ x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True) # merged boxes
+ if redundant:
+ i = i[iou.sum(1) > 1] # require redundancy
+
+ output[xi] = x[i]
+ if (time.time() - t) > time_limit:
+ break # time limit exceeded
+
+ return output
+
+
+def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, classes=None, agnostic=False, labels=()):
+ """Performs Non-Maximum Suppression (NMS) on inference results
+
+ Returns:
+ detections with shape: nx6 (x1, y1, x2, y2, conf, cls)
+ """
+
+ nc = prediction.shape[2] - 5 # number of classes
+ xc = prediction[..., 4] > conf_thres # candidates
+
+ # Settings
+ # (pixels) maximum box width and height
+ max_wh = 4096
+ time_limit = 10.0 # seconds to quit after
+ redundant = True # require redundant detections
+ multi_label = nc > 1 # multiple labels per box (adds 0.5ms/img)
+ merge = False # use merge-NMS
+
+ t = time.time()
+ output = [torch.zeros((0, 6), device=prediction.device)] * prediction.shape[0]
+ for xi, x in enumerate(prediction): # image index, image inference
+ x = x[xc[xi]] # confidence
+
+ # Cat apriori labels if autolabelling
+ if labels and len(labels[xi]):
+ label_id = labels[xi]
+ v = torch.zeros((len(label_id), nc + 5), device=x.device)
+ v[:, :4] = label_id[:, 1:5] # box
+ v[:, 4] = 1.0 # conf
+ v[range(len(label_id)), label_id[:, 0].long() + 5] = 1.0 # cls
+ x = torch.cat((x, v), 0)
+
+ # If none remain process next image
+ if not x.shape[0]:
+ continue
+
+ # Compute conf
+ x[:, 5:] *= x[:, 4:5] # conf = obj_conf * cls_conf
+
+ # Box (center x, center y, width, height) to (x1, y1, x2, y2)
+ box = xywh2xyxy(x[:, :4])
+
+ # Detections matrix nx6 (xyxy, conf, cls)
+ if multi_label:
+ i, j = (x[:, 5:] > conf_thres).nonzero(as_tuple=False).T
+ x = torch.cat((box[i], x[i, j + 5, None], j[:, None].float()), 1)
+ else: # best class only
+ conf, j = x[:, 5:].max(1, keepdim=True)
+ x = torch.cat((box, conf, j.float()), 1)[conf.view(-1) > conf_thres]
+
+ # Filter by class
+ if classes is not None:
+ x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)]
+
+ # Check shape
+ n = x.shape[0] # number of boxes
+ if not n: # no boxes
+ continue
+
+ x = x[x[:, 4].argsort(descending=True)] # sort by confidence
+
+ # Batched NMS
+ c = x[:, 5:6] * (0 if agnostic else max_wh) # classes
+ boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores
+ i = torchvision.ops.nms(boxes, scores, iou_thres) # NMS
+ if merge and (1 < n < 3e3): # Merge NMS (boxes merged using weighted mean)
+ # update boxes as boxes(i,4) = weights(i,n) * boxes(n,4)
+ iou = box_iou(boxes[i], boxes) > iou_thres # iou matrix
+ weights = iou * scores[None] # box weights
+ x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True) # merged boxes
+ if redundant:
+ i = i[iou.sum(1) > 1] # require redundancy
+
+ output[xi] = x[i]
+ if (time.time() - t) > time_limit:
+ print(f"WARNING: NMS time limit {time_limit}s exceeded")
+ break # time limit exceeded
+
+ return output
+
+
+def scale_coords_landmarks(img1_shape, coords, img0_shape, ratio_pad=None):
+ # Rescale coords (xyxy) from img1_shape to img0_shape
+ if ratio_pad is None: # calculate from img0_shape
+ gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new
+ pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2 # wh padding
+ else:
+ gain = ratio_pad[0][0]
+ pad = ratio_pad[1]
+
+ coords[:, [0, 2, 4, 6, 8]] -= pad[0] # x padding
+ coords[:, [1, 3, 5, 7, 9]] -= pad[1] # y padding
+ coords[:, :10] /= gain
+ coords[:, 0].clamp_(0, img0_shape[1]) # x1
+ coords[:, 1].clamp_(0, img0_shape[0]) # y1
+ coords[:, 2].clamp_(0, img0_shape[1]) # x2
+ coords[:, 3].clamp_(0, img0_shape[0]) # y2
+ coords[:, 4].clamp_(0, img0_shape[1]) # x3
+ coords[:, 5].clamp_(0, img0_shape[0]) # y3
+ coords[:, 6].clamp_(0, img0_shape[1]) # x4
+ coords[:, 7].clamp_(0, img0_shape[0]) # y4
+ coords[:, 8].clamp_(0, img0_shape[1]) # x5
+ coords[:, 9].clamp_(0, img0_shape[0]) # y5
+ return coords
diff --git a/r_facelib/detection/yolov5face/utils/torch_utils.py b/r_facelib/detection/yolov5face/utils/torch_utils.py
new file mode 100644
index 0000000..f702962
--- /dev/null
+++ b/r_facelib/detection/yolov5face/utils/torch_utils.py
@@ -0,0 +1,40 @@
+import torch
+from torch import nn
+
+
+def fuse_conv_and_bn(conv, bn):
+ # Fuse convolution and batchnorm layers https://tehnokv.com/posts/fusing-batchnorm-and-conv/
+ fusedconv = (
+ nn.Conv2d(
+ conv.in_channels,
+ conv.out_channels,
+ kernel_size=conv.kernel_size,
+ stride=conv.stride,
+ padding=conv.padding,
+ groups=conv.groups,
+ bias=True,
+ )
+ .requires_grad_(False)
+ .to(conv.weight.device)
+ )
+
+ # prepare filters
+ w_conv = conv.weight.clone().view(conv.out_channels, -1)
+ w_bn = torch.diag(bn.weight.div(torch.sqrt(bn.eps + bn.running_var)))
+ fusedconv.weight.copy_(torch.mm(w_bn, w_conv).view(fusedconv.weight.size()))
+
+ # prepare spatial bias
+ b_conv = torch.zeros(conv.weight.size(0), device=conv.weight.device) if conv.bias is None else conv.bias
+ b_bn = bn.bias - bn.weight.mul(bn.running_mean).div(torch.sqrt(bn.running_var + bn.eps))
+ fusedconv.bias.copy_(torch.mm(w_bn, b_conv.reshape(-1, 1)).reshape(-1) + b_bn)
+
+ return fusedconv
+
+
+def copy_attr(a, b, include=(), exclude=()):
+ # Copy attributes from b to a, options to only include [...] and to exclude [...]
+ for k, v in b.__dict__.items():
+ if (include and k not in include) or k.startswith("_") or k in exclude:
+ continue
+
+ setattr(a, k, v)
diff --git a/r_facelib/parsing/__init__.py b/r_facelib/parsing/__init__.py
new file mode 100644
index 0000000..e5aaa28
--- /dev/null
+++ b/r_facelib/parsing/__init__.py
@@ -0,0 +1,23 @@
+import torch
+
+from r_facelib.utils import load_file_from_url
+from .bisenet import BiSeNet
+from .parsenet import ParseNet
+
+
+def init_parsing_model(model_name='bisenet', half=False, device='cuda'):
+ if model_name == 'bisenet':
+ model = BiSeNet(num_class=19)
+ model_url = 'https://github.com/sczhou/CodeFormer/releases/download/v0.1.0/parsing_bisenet.pth'
+ elif model_name == 'parsenet':
+ model = ParseNet(in_size=512, out_size=512, parsing_ch=19)
+ model_url = 'https://github.com/sczhou/CodeFormer/releases/download/v0.1.0/parsing_parsenet.pth'
+ else:
+ raise NotImplementedError(f'{model_name} is not implemented.')
+
+ model_path = load_file_from_url(url=model_url, model_dir='../../models/facedetection', progress=True, file_name=None)
+ load_net = torch.load(model_path, map_location=lambda storage, loc: storage)
+ model.load_state_dict(load_net, strict=True)
+ model.eval()
+ model = model.to(device)
+ return model
diff --git a/r_facelib/parsing/bisenet.py b/r_facelib/parsing/bisenet.py
new file mode 100644
index 0000000..9e7a084
--- /dev/null
+++ b/r_facelib/parsing/bisenet.py
@@ -0,0 +1,140 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from .resnet import ResNet18
+
+
+class ConvBNReLU(nn.Module):
+
+ def __init__(self, in_chan, out_chan, ks=3, stride=1, padding=1):
+ super(ConvBNReLU, self).__init__()
+ self.conv = nn.Conv2d(in_chan, out_chan, kernel_size=ks, stride=stride, padding=padding, bias=False)
+ self.bn = nn.BatchNorm2d(out_chan)
+
+ def forward(self, x):
+ x = self.conv(x)
+ x = F.relu(self.bn(x))
+ return x
+
+
+class BiSeNetOutput(nn.Module):
+
+ def __init__(self, in_chan, mid_chan, num_class):
+ super(BiSeNetOutput, self).__init__()
+ self.conv = ConvBNReLU(in_chan, mid_chan, ks=3, stride=1, padding=1)
+ self.conv_out = nn.Conv2d(mid_chan, num_class, kernel_size=1, bias=False)
+
+ def forward(self, x):
+ feat = self.conv(x)
+ out = self.conv_out(feat)
+ return out, feat
+
+
+class AttentionRefinementModule(nn.Module):
+
+ def __init__(self, in_chan, out_chan):
+ super(AttentionRefinementModule, self).__init__()
+ self.conv = ConvBNReLU(in_chan, out_chan, ks=3, stride=1, padding=1)
+ self.conv_atten = nn.Conv2d(out_chan, out_chan, kernel_size=1, bias=False)
+ self.bn_atten = nn.BatchNorm2d(out_chan)
+ self.sigmoid_atten = nn.Sigmoid()
+
+ def forward(self, x):
+ feat = self.conv(x)
+ atten = F.avg_pool2d(feat, feat.size()[2:])
+ atten = self.conv_atten(atten)
+ atten = self.bn_atten(atten)
+ atten = self.sigmoid_atten(atten)
+ out = torch.mul(feat, atten)
+ return out
+
+
+class ContextPath(nn.Module):
+
+ def __init__(self):
+ super(ContextPath, self).__init__()
+ self.resnet = ResNet18()
+ self.arm16 = AttentionRefinementModule(256, 128)
+ self.arm32 = AttentionRefinementModule(512, 128)
+ self.conv_head32 = ConvBNReLU(128, 128, ks=3, stride=1, padding=1)
+ self.conv_head16 = ConvBNReLU(128, 128, ks=3, stride=1, padding=1)
+ self.conv_avg = ConvBNReLU(512, 128, ks=1, stride=1, padding=0)
+
+ def forward(self, x):
+ feat8, feat16, feat32 = self.resnet(x)
+ h8, w8 = feat8.size()[2:]
+ h16, w16 = feat16.size()[2:]
+ h32, w32 = feat32.size()[2:]
+
+ avg = F.avg_pool2d(feat32, feat32.size()[2:])
+ avg = self.conv_avg(avg)
+ avg_up = F.interpolate(avg, (h32, w32), mode='nearest')
+
+ feat32_arm = self.arm32(feat32)
+ feat32_sum = feat32_arm + avg_up
+ feat32_up = F.interpolate(feat32_sum, (h16, w16), mode='nearest')
+ feat32_up = self.conv_head32(feat32_up)
+
+ feat16_arm = self.arm16(feat16)
+ feat16_sum = feat16_arm + feat32_up
+ feat16_up = F.interpolate(feat16_sum, (h8, w8), mode='nearest')
+ feat16_up = self.conv_head16(feat16_up)
+
+ return feat8, feat16_up, feat32_up # x8, x8, x16
+
+
+class FeatureFusionModule(nn.Module):
+
+ def __init__(self, in_chan, out_chan):
+ super(FeatureFusionModule, self).__init__()
+ self.convblk = ConvBNReLU(in_chan, out_chan, ks=1, stride=1, padding=0)
+ self.conv1 = nn.Conv2d(out_chan, out_chan // 4, kernel_size=1, stride=1, padding=0, bias=False)
+ self.conv2 = nn.Conv2d(out_chan // 4, out_chan, kernel_size=1, stride=1, padding=0, bias=False)
+ self.relu = nn.ReLU(inplace=True)
+ self.sigmoid = nn.Sigmoid()
+
+ def forward(self, fsp, fcp):
+ fcat = torch.cat([fsp, fcp], dim=1)
+ feat = self.convblk(fcat)
+ atten = F.avg_pool2d(feat, feat.size()[2:])
+ atten = self.conv1(atten)
+ atten = self.relu(atten)
+ atten = self.conv2(atten)
+ atten = self.sigmoid(atten)
+ feat_atten = torch.mul(feat, atten)
+ feat_out = feat_atten + feat
+ return feat_out
+
+
+class BiSeNet(nn.Module):
+
+ def __init__(self, num_class):
+ super(BiSeNet, self).__init__()
+ self.cp = ContextPath()
+ self.ffm = FeatureFusionModule(256, 256)
+ self.conv_out = BiSeNetOutput(256, 256, num_class)
+ self.conv_out16 = BiSeNetOutput(128, 64, num_class)
+ self.conv_out32 = BiSeNetOutput(128, 64, num_class)
+
+ def forward(self, x, return_feat=False):
+ h, w = x.size()[2:]
+ feat_res8, feat_cp8, feat_cp16 = self.cp(x) # return res3b1 feature
+ feat_sp = feat_res8 # replace spatial path feature with res3b1 feature
+ feat_fuse = self.ffm(feat_sp, feat_cp8)
+
+ out, feat = self.conv_out(feat_fuse)
+ out16, feat16 = self.conv_out16(feat_cp8)
+ out32, feat32 = self.conv_out32(feat_cp16)
+
+ out = F.interpolate(out, (h, w), mode='bilinear', align_corners=True)
+ out16 = F.interpolate(out16, (h, w), mode='bilinear', align_corners=True)
+ out32 = F.interpolate(out32, (h, w), mode='bilinear', align_corners=True)
+
+ if return_feat:
+ feat = F.interpolate(feat, (h, w), mode='bilinear', align_corners=True)
+ feat16 = F.interpolate(feat16, (h, w), mode='bilinear', align_corners=True)
+ feat32 = F.interpolate(feat32, (h, w), mode='bilinear', align_corners=True)
+ return out, out16, out32, feat, feat16, feat32
+ else:
+ return out, out16, out32
diff --git a/r_facelib/parsing/parsenet.py b/r_facelib/parsing/parsenet.py
new file mode 100644
index 0000000..2e80921
--- /dev/null
+++ b/r_facelib/parsing/parsenet.py
@@ -0,0 +1,194 @@
+"""Modified from https://github.com/chaofengc/PSFRGAN
+"""
+import numpy as np
+import torch.nn as nn
+from torch.nn import functional as F
+
+
+class NormLayer(nn.Module):
+ """Normalization Layers.
+
+ Args:
+ channels: input channels, for batch norm and instance norm.
+ input_size: input shape without batch size, for layer norm.
+ """
+
+ def __init__(self, channels, normalize_shape=None, norm_type='bn'):
+ super(NormLayer, self).__init__()
+ norm_type = norm_type.lower()
+ self.norm_type = norm_type
+ if norm_type == 'bn':
+ self.norm = nn.BatchNorm2d(channels, affine=True)
+ elif norm_type == 'in':
+ self.norm = nn.InstanceNorm2d(channels, affine=False)
+ elif norm_type == 'gn':
+ self.norm = nn.GroupNorm(32, channels, affine=True)
+ elif norm_type == 'pixel':
+ self.norm = lambda x: F.normalize(x, p=2, dim=1)
+ elif norm_type == 'layer':
+ self.norm = nn.LayerNorm(normalize_shape)
+ elif norm_type == 'none':
+ self.norm = lambda x: x * 1.0
+ else:
+ assert 1 == 0, f'Norm type {norm_type} not support.'
+
+ def forward(self, x, ref=None):
+ if self.norm_type == 'spade':
+ return self.norm(x, ref)
+ else:
+ return self.norm(x)
+
+
+class ReluLayer(nn.Module):
+ """Relu Layer.
+
+ Args:
+ relu type: type of relu layer, candidates are
+ - ReLU
+ - LeakyReLU: default relu slope 0.2
+ - PRelu
+ - SELU
+ - none: direct pass
+ """
+
+ def __init__(self, channels, relu_type='relu'):
+ super(ReluLayer, self).__init__()
+ relu_type = relu_type.lower()
+ if relu_type == 'relu':
+ self.func = nn.ReLU(True)
+ elif relu_type == 'leakyrelu':
+ self.func = nn.LeakyReLU(0.2, inplace=True)
+ elif relu_type == 'prelu':
+ self.func = nn.PReLU(channels)
+ elif relu_type == 'selu':
+ self.func = nn.SELU(True)
+ elif relu_type == 'none':
+ self.func = lambda x: x * 1.0
+ else:
+ assert 1 == 0, f'Relu type {relu_type} not support.'
+
+ def forward(self, x):
+ return self.func(x)
+
+
+class ConvLayer(nn.Module):
+
+ def __init__(self,
+ in_channels,
+ out_channels,
+ kernel_size=3,
+ scale='none',
+ norm_type='none',
+ relu_type='none',
+ use_pad=True,
+ bias=True):
+ super(ConvLayer, self).__init__()
+ self.use_pad = use_pad
+ self.norm_type = norm_type
+ if norm_type in ['bn']:
+ bias = False
+
+ stride = 2 if scale == 'down' else 1
+
+ self.scale_func = lambda x: x
+ if scale == 'up':
+ self.scale_func = lambda x: nn.functional.interpolate(x, scale_factor=2, mode='nearest')
+
+ self.reflection_pad = nn.ReflectionPad2d(int(np.ceil((kernel_size - 1.) / 2)))
+ self.conv2d = nn.Conv2d(in_channels, out_channels, kernel_size, stride, bias=bias)
+
+ self.relu = ReluLayer(out_channels, relu_type)
+ self.norm = NormLayer(out_channels, norm_type=norm_type)
+
+ def forward(self, x):
+ out = self.scale_func(x)
+ if self.use_pad:
+ out = self.reflection_pad(out)
+ out = self.conv2d(out)
+ out = self.norm(out)
+ out = self.relu(out)
+ return out
+
+
+class ResidualBlock(nn.Module):
+ """
+ Residual block recommended in: http://torch.ch/blog/2016/02/04/resnets.html
+ """
+
+ def __init__(self, c_in, c_out, relu_type='prelu', norm_type='bn', scale='none'):
+ super(ResidualBlock, self).__init__()
+
+ if scale == 'none' and c_in == c_out:
+ self.shortcut_func = lambda x: x
+ else:
+ self.shortcut_func = ConvLayer(c_in, c_out, 3, scale)
+
+ scale_config_dict = {'down': ['none', 'down'], 'up': ['up', 'none'], 'none': ['none', 'none']}
+ scale_conf = scale_config_dict[scale]
+
+ self.conv1 = ConvLayer(c_in, c_out, 3, scale_conf[0], norm_type=norm_type, relu_type=relu_type)
+ self.conv2 = ConvLayer(c_out, c_out, 3, scale_conf[1], norm_type=norm_type, relu_type='none')
+
+ def forward(self, x):
+ identity = self.shortcut_func(x)
+
+ res = self.conv1(x)
+ res = self.conv2(res)
+ return identity + res
+
+
+class ParseNet(nn.Module):
+
+ def __init__(self,
+ in_size=128,
+ out_size=128,
+ min_feat_size=32,
+ base_ch=64,
+ parsing_ch=19,
+ res_depth=10,
+ relu_type='LeakyReLU',
+ norm_type='bn',
+ ch_range=[32, 256]):
+ super().__init__()
+ self.res_depth = res_depth
+ act_args = {'norm_type': norm_type, 'relu_type': relu_type}
+ min_ch, max_ch = ch_range
+
+ ch_clip = lambda x: max(min_ch, min(x, max_ch)) # noqa: E731
+ min_feat_size = min(in_size, min_feat_size)
+
+ down_steps = int(np.log2(in_size // min_feat_size))
+ up_steps = int(np.log2(out_size // min_feat_size))
+
+ # =============== define encoder-body-decoder ====================
+ self.encoder = []
+ self.encoder.append(ConvLayer(3, base_ch, 3, 1))
+ head_ch = base_ch
+ for i in range(down_steps):
+ cin, cout = ch_clip(head_ch), ch_clip(head_ch * 2)
+ self.encoder.append(ResidualBlock(cin, cout, scale='down', **act_args))
+ head_ch = head_ch * 2
+
+ self.body = []
+ for i in range(res_depth):
+ self.body.append(ResidualBlock(ch_clip(head_ch), ch_clip(head_ch), **act_args))
+
+ self.decoder = []
+ for i in range(up_steps):
+ cin, cout = ch_clip(head_ch), ch_clip(head_ch // 2)
+ self.decoder.append(ResidualBlock(cin, cout, scale='up', **act_args))
+ head_ch = head_ch // 2
+
+ self.encoder = nn.Sequential(*self.encoder)
+ self.body = nn.Sequential(*self.body)
+ self.decoder = nn.Sequential(*self.decoder)
+ self.out_img_conv = ConvLayer(ch_clip(head_ch), 3)
+ self.out_mask_conv = ConvLayer(ch_clip(head_ch), parsing_ch)
+
+ def forward(self, x):
+ feat = self.encoder(x)
+ x = feat + self.body(feat)
+ x = self.decoder(x)
+ out_img = self.out_img_conv(x)
+ out_mask = self.out_mask_conv(x)
+ return out_mask, out_img
diff --git a/r_facelib/parsing/resnet.py b/r_facelib/parsing/resnet.py
new file mode 100644
index 0000000..e7cc283
--- /dev/null
+++ b/r_facelib/parsing/resnet.py
@@ -0,0 +1,69 @@
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+def conv3x3(in_planes, out_planes, stride=1):
+ """3x3 convolution with padding"""
+ return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False)
+
+
+class BasicBlock(nn.Module):
+
+ def __init__(self, in_chan, out_chan, stride=1):
+ super(BasicBlock, self).__init__()
+ self.conv1 = conv3x3(in_chan, out_chan, stride)
+ self.bn1 = nn.BatchNorm2d(out_chan)
+ self.conv2 = conv3x3(out_chan, out_chan)
+ self.bn2 = nn.BatchNorm2d(out_chan)
+ self.relu = nn.ReLU(inplace=True)
+ self.downsample = None
+ if in_chan != out_chan or stride != 1:
+ self.downsample = nn.Sequential(
+ nn.Conv2d(in_chan, out_chan, kernel_size=1, stride=stride, bias=False),
+ nn.BatchNorm2d(out_chan),
+ )
+
+ def forward(self, x):
+ residual = self.conv1(x)
+ residual = F.relu(self.bn1(residual))
+ residual = self.conv2(residual)
+ residual = self.bn2(residual)
+
+ shortcut = x
+ if self.downsample is not None:
+ shortcut = self.downsample(x)
+
+ out = shortcut + residual
+ out = self.relu(out)
+ return out
+
+
+def create_layer_basic(in_chan, out_chan, bnum, stride=1):
+ layers = [BasicBlock(in_chan, out_chan, stride=stride)]
+ for i in range(bnum - 1):
+ layers.append(BasicBlock(out_chan, out_chan, stride=1))
+ return nn.Sequential(*layers)
+
+
+class ResNet18(nn.Module):
+
+ def __init__(self):
+ super(ResNet18, self).__init__()
+ self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
+ self.bn1 = nn.BatchNorm2d(64)
+ self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
+ self.layer1 = create_layer_basic(64, 64, bnum=2, stride=1)
+ self.layer2 = create_layer_basic(64, 128, bnum=2, stride=2)
+ self.layer3 = create_layer_basic(128, 256, bnum=2, stride=2)
+ self.layer4 = create_layer_basic(256, 512, bnum=2, stride=2)
+
+ def forward(self, x):
+ x = self.conv1(x)
+ x = F.relu(self.bn1(x))
+ x = self.maxpool(x)
+
+ x = self.layer1(x)
+ feat8 = self.layer2(x) # 1/8
+ feat16 = self.layer3(feat8) # 1/16
+ feat32 = self.layer4(feat16) # 1/32
+ return feat8, feat16, feat32
diff --git a/r_facelib/utils/__init__.py b/r_facelib/utils/__init__.py
new file mode 100644
index 0000000..3397bda
--- /dev/null
+++ b/r_facelib/utils/__init__.py
@@ -0,0 +1,7 @@
+from .face_utils import align_crop_face_landmarks, compute_increased_bbox, get_valid_bboxes, paste_face_back
+from .misc import img2tensor, load_file_from_url, download_pretrained_models, scandir
+
+__all__ = [
+ 'align_crop_face_landmarks', 'compute_increased_bbox', 'get_valid_bboxes', 'load_file_from_url',
+ 'download_pretrained_models', 'paste_face_back', 'img2tensor', 'scandir'
+]
diff --git a/r_facelib/utils/face_restoration_helper.py b/r_facelib/utils/face_restoration_helper.py
new file mode 100644
index 0000000..1db75c9
--- /dev/null
+++ b/r_facelib/utils/face_restoration_helper.py
@@ -0,0 +1,455 @@
+import cv2
+import numpy as np
+import os
+import torch
+from torchvision.transforms.functional import normalize
+
+from r_facelib.detection import init_detection_model
+from r_facelib.parsing import init_parsing_model
+from r_facelib.utils.misc import img2tensor, imwrite
+
+
+def get_largest_face(det_faces, h, w):
+
+ def get_location(val, length):
+ if val < 0:
+ return 0
+ elif val > length:
+ return length
+ else:
+ return val
+
+ face_areas = []
+ for det_face in det_faces:
+ left = get_location(det_face[0], w)
+ right = get_location(det_face[2], w)
+ top = get_location(det_face[1], h)
+ bottom = get_location(det_face[3], h)
+ face_area = (right - left) * (bottom - top)
+ face_areas.append(face_area)
+ largest_idx = face_areas.index(max(face_areas))
+ return det_faces[largest_idx], largest_idx
+
+
+def get_center_face(det_faces, h=0, w=0, center=None):
+ if center is not None:
+ center = np.array(center)
+ else:
+ center = np.array([w / 2, h / 2])
+ center_dist = []
+ for det_face in det_faces:
+ face_center = np.array([(det_face[0] + det_face[2]) / 2, (det_face[1] + det_face[3]) / 2])
+ dist = np.linalg.norm(face_center - center)
+ center_dist.append(dist)
+ center_idx = center_dist.index(min(center_dist))
+ return det_faces[center_idx], center_idx
+
+
+class FaceRestoreHelper(object):
+ """Helper for the face restoration pipeline (base class)."""
+
+ def __init__(self,
+ upscale_factor,
+ face_size=512,
+ crop_ratio=(1, 1),
+ det_model='retinaface_resnet50',
+ save_ext='png',
+ template_3points=False,
+ pad_blur=False,
+ use_parse=False,
+ device=None):
+ self.template_3points = template_3points # improve robustness
+ self.upscale_factor = upscale_factor
+ # the cropped face ratio based on the square face
+ self.crop_ratio = crop_ratio # (h, w)
+ assert (self.crop_ratio[0] >= 1 and self.crop_ratio[1] >= 1), 'crop ration only supports >=1'
+ self.face_size = (int(face_size * self.crop_ratio[1]), int(face_size * self.crop_ratio[0]))
+
+ if self.template_3points:
+ self.face_template = np.array([[192, 240], [319, 240], [257, 371]])
+ else:
+ # standard 5 landmarks for FFHQ faces with 512 x 512
+ # facexlib
+ self.face_template = np.array([[192.98138, 239.94708], [318.90277, 240.1936], [256.63416, 314.01935],
+ [201.26117, 371.41043], [313.08905, 371.15118]])
+
+ # dlib: left_eye: 36:41 right_eye: 42:47 nose: 30,32,33,34 left mouth corner: 48 right mouth corner: 54
+ # self.face_template = np.array([[193.65928, 242.98541], [318.32558, 243.06108], [255.67984, 328.82894],
+ # [198.22603, 372.82502], [313.91018, 372.75659]])
+
+
+ self.face_template = self.face_template * (face_size / 512.0)
+ if self.crop_ratio[0] > 1:
+ self.face_template[:, 1] += face_size * (self.crop_ratio[0] - 1) / 2
+ if self.crop_ratio[1] > 1:
+ self.face_template[:, 0] += face_size * (self.crop_ratio[1] - 1) / 2
+ self.save_ext = save_ext
+ self.pad_blur = pad_blur
+ if self.pad_blur is True:
+ self.template_3points = False
+
+ self.all_landmarks_5 = []
+ self.det_faces = []
+ self.affine_matrices = []
+ self.inverse_affine_matrices = []
+ self.cropped_faces = []
+ self.restored_faces = []
+ self.pad_input_imgs = []
+
+ if device is None:
+ self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+ else:
+ self.device = device
+
+ # init face detection model
+ self.face_det = init_detection_model(det_model, half=False, device=self.device)
+
+ # init face parsing model
+ self.use_parse = use_parse
+ self.face_parse = init_parsing_model(model_name='parsenet', device=self.device)
+
+ def set_upscale_factor(self, upscale_factor):
+ self.upscale_factor = upscale_factor
+
+ def read_image(self, img):
+ """img can be image path or cv2 loaded image."""
+ # self.input_img is Numpy array, (h, w, c), BGR, uint8, [0, 255]
+ if isinstance(img, str):
+ img = cv2.imread(img)
+
+ if np.max(img) > 256: # 16-bit image
+ img = img / 65535 * 255
+ if len(img.shape) == 2: # gray image
+ img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
+ elif img.shape[2] == 4: # BGRA image with alpha channel
+ img = img[:, :, 0:3]
+
+ self.input_img = img
+
+ if min(self.input_img.shape[:2])<512:
+ f = 512.0/min(self.input_img.shape[:2])
+ self.input_img = cv2.resize(self.input_img, (0,0), fx=f, fy=f, interpolation=cv2.INTER_LINEAR)
+
+ def get_face_landmarks_5(self,
+ only_keep_largest=False,
+ only_center_face=False,
+ resize=None,
+ blur_ratio=0.01,
+ eye_dist_threshold=None):
+ if resize is None:
+ scale = 1
+ input_img = self.input_img
+ else:
+ h, w = self.input_img.shape[0:2]
+ scale = resize / min(h, w)
+ scale = max(1, scale) # always scale up
+ h, w = int(h * scale), int(w * scale)
+ interp = cv2.INTER_AREA if scale < 1 else cv2.INTER_LINEAR
+ input_img = cv2.resize(self.input_img, (w, h), interpolation=interp)
+
+ with torch.no_grad():
+ bboxes = self.face_det.detect_faces(input_img)
+
+ if bboxes is None or bboxes.shape[0] == 0:
+ return 0
+ else:
+ bboxes = bboxes / scale
+
+ for bbox in bboxes:
+ # remove faces with too small eye distance: side faces or too small faces
+ eye_dist = np.linalg.norm([bbox[6] - bbox[8], bbox[7] - bbox[9]])
+ if eye_dist_threshold is not None and (eye_dist < eye_dist_threshold):
+ continue
+
+ if self.template_3points:
+ landmark = np.array([[bbox[i], bbox[i + 1]] for i in range(5, 11, 2)])
+ else:
+ landmark = np.array([[bbox[i], bbox[i + 1]] for i in range(5, 15, 2)])
+ self.all_landmarks_5.append(landmark)
+ self.det_faces.append(bbox[0:5])
+
+ if len(self.det_faces) == 0:
+ return 0
+ if only_keep_largest:
+ h, w, _ = self.input_img.shape
+ self.det_faces, largest_idx = get_largest_face(self.det_faces, h, w)
+ self.all_landmarks_5 = [self.all_landmarks_5[largest_idx]]
+ elif only_center_face:
+ h, w, _ = self.input_img.shape
+ self.det_faces, center_idx = get_center_face(self.det_faces, h, w)
+ self.all_landmarks_5 = [self.all_landmarks_5[center_idx]]
+
+ # pad blurry images
+ if self.pad_blur:
+ self.pad_input_imgs = []
+ for landmarks in self.all_landmarks_5:
+ # get landmarks
+ eye_left = landmarks[0, :]
+ eye_right = landmarks[1, :]
+ eye_avg = (eye_left + eye_right) * 0.5
+ mouth_avg = (landmarks[3, :] + landmarks[4, :]) * 0.5
+ eye_to_eye = eye_right - eye_left
+ eye_to_mouth = mouth_avg - eye_avg
+
+ # Get the oriented crop rectangle
+ # x: half width of the oriented crop rectangle
+ x = eye_to_eye - np.flipud(eye_to_mouth) * [-1, 1]
+ # - np.flipud(eye_to_mouth) * [-1, 1]: rotate 90 clockwise
+ # norm with the hypotenuse: get the direction
+ x /= np.hypot(*x) # get the hypotenuse of a right triangle
+ rect_scale = 1.5
+ x *= max(np.hypot(*eye_to_eye) * 2.0 * rect_scale, np.hypot(*eye_to_mouth) * 1.8 * rect_scale)
+ # y: half height of the oriented crop rectangle
+ y = np.flipud(x) * [-1, 1]
+
+ # c: center
+ c = eye_avg + eye_to_mouth * 0.1
+ # quad: (left_top, left_bottom, right_bottom, right_top)
+ quad = np.stack([c - x - y, c - x + y, c + x + y, c + x - y])
+ # qsize: side length of the square
+ qsize = np.hypot(*x) * 2
+ border = max(int(np.rint(qsize * 0.1)), 3)
+
+ # get pad
+ # pad: (width_left, height_top, width_right, height_bottom)
+ pad = (int(np.floor(min(quad[:, 0]))), int(np.floor(min(quad[:, 1]))), int(np.ceil(max(quad[:, 0]))),
+ int(np.ceil(max(quad[:, 1]))))
+ pad = [
+ max(-pad[0] + border, 1),
+ max(-pad[1] + border, 1),
+ max(pad[2] - self.input_img.shape[0] + border, 1),
+ max(pad[3] - self.input_img.shape[1] + border, 1)
+ ]
+
+ if max(pad) > 1:
+ # pad image
+ pad_img = np.pad(self.input_img, ((pad[1], pad[3]), (pad[0], pad[2]), (0, 0)), 'reflect')
+ # modify landmark coords
+ landmarks[:, 0] += pad[0]
+ landmarks[:, 1] += pad[1]
+ # blur pad images
+ h, w, _ = pad_img.shape
+ y, x, _ = np.ogrid[:h, :w, :1]
+ mask = np.maximum(1.0 - np.minimum(np.float32(x) / pad[0],
+ np.float32(w - 1 - x) / pad[2]),
+ 1.0 - np.minimum(np.float32(y) / pad[1],
+ np.float32(h - 1 - y) / pad[3]))
+ blur = int(qsize * blur_ratio)
+ if blur % 2 == 0:
+ blur += 1
+ blur_img = cv2.boxFilter(pad_img, 0, ksize=(blur, blur))
+ # blur_img = cv2.GaussianBlur(pad_img, (blur, blur), 0)
+
+ pad_img = pad_img.astype('float32')
+ pad_img += (blur_img - pad_img) * np.clip(mask * 3.0 + 1.0, 0.0, 1.0)
+ pad_img += (np.median(pad_img, axis=(0, 1)) - pad_img) * np.clip(mask, 0.0, 1.0)
+ pad_img = np.clip(pad_img, 0, 255) # float32, [0, 255]
+ self.pad_input_imgs.append(pad_img)
+ else:
+ self.pad_input_imgs.append(np.copy(self.input_img))
+
+ return len(self.all_landmarks_5)
+
+ def align_warp_face(self, save_cropped_path=None, border_mode='constant'):
+ """Align and warp faces with face template.
+ """
+ if self.pad_blur:
+ assert len(self.pad_input_imgs) == len(
+ self.all_landmarks_5), f'Mismatched samples: {len(self.pad_input_imgs)} and {len(self.all_landmarks_5)}'
+ for idx, landmark in enumerate(self.all_landmarks_5):
+ # use 5 landmarks to get affine matrix
+ # use cv2.LMEDS method for the equivalence to skimage transform
+ # ref: https://blog.csdn.net/yichxi/article/details/115827338
+ affine_matrix = cv2.estimateAffinePartial2D(landmark, self.face_template, method=cv2.LMEDS)[0]
+ self.affine_matrices.append(affine_matrix)
+ # warp and crop faces
+ if border_mode == 'constant':
+ border_mode = cv2.BORDER_CONSTANT
+ elif border_mode == 'reflect101':
+ border_mode = cv2.BORDER_REFLECT101
+ elif border_mode == 'reflect':
+ border_mode = cv2.BORDER_REFLECT
+ if self.pad_blur:
+ input_img = self.pad_input_imgs[idx]
+ else:
+ input_img = self.input_img
+ cropped_face = cv2.warpAffine(
+ input_img, affine_matrix, self.face_size, borderMode=border_mode, borderValue=(135, 133, 132)) # gray
+ self.cropped_faces.append(cropped_face)
+ # save the cropped face
+ if save_cropped_path is not None:
+ path = os.path.splitext(save_cropped_path)[0]
+ save_path = f'{path}_{idx:02d}.{self.save_ext}'
+ imwrite(cropped_face, save_path)
+
+ def get_inverse_affine(self, save_inverse_affine_path=None):
+ """Get inverse affine matrix."""
+ for idx, affine_matrix in enumerate(self.affine_matrices):
+ inverse_affine = cv2.invertAffineTransform(affine_matrix)
+ inverse_affine *= self.upscale_factor
+ self.inverse_affine_matrices.append(inverse_affine)
+ # save inverse affine matrices
+ if save_inverse_affine_path is not None:
+ path, _ = os.path.splitext(save_inverse_affine_path)
+ save_path = f'{path}_{idx:02d}.pth'
+ torch.save(inverse_affine, save_path)
+
+
+ def add_restored_face(self, face):
+ self.restored_faces.append(face)
+
+
+ def paste_faces_to_input_image(self, save_path=None, upsample_img=None, draw_box=False, face_upsampler=None):
+ h, w, _ = self.input_img.shape
+ h_up, w_up = int(h * self.upscale_factor), int(w * self.upscale_factor)
+
+ if upsample_img is None:
+ # simply resize the background
+ # upsample_img = cv2.resize(self.input_img, (w_up, h_up), interpolation=cv2.INTER_LANCZOS4)
+ upsample_img = cv2.resize(self.input_img, (w_up, h_up), interpolation=cv2.INTER_LINEAR)
+ else:
+ upsample_img = cv2.resize(upsample_img, (w_up, h_up), interpolation=cv2.INTER_LANCZOS4)
+
+ assert len(self.restored_faces) == len(
+ self.inverse_affine_matrices), ('length of restored_faces and affine_matrices are different.')
+
+ inv_mask_borders = []
+ for restored_face, inverse_affine in zip(self.restored_faces, self.inverse_affine_matrices):
+ if face_upsampler is not None:
+ restored_face = face_upsampler.enhance(restored_face, outscale=self.upscale_factor)[0]
+ inverse_affine /= self.upscale_factor
+ inverse_affine[:, 2] *= self.upscale_factor
+ face_size = (self.face_size[0]*self.upscale_factor, self.face_size[1]*self.upscale_factor)
+ else:
+ # Add an offset to inverse affine matrix, for more precise back alignment
+ if self.upscale_factor > 1:
+ extra_offset = 0.5 * self.upscale_factor
+ else:
+ extra_offset = 0
+ inverse_affine[:, 2] += extra_offset
+ face_size = self.face_size
+ inv_restored = cv2.warpAffine(restored_face, inverse_affine, (w_up, h_up))
+
+ # if draw_box or not self.use_parse: # use square parse maps
+ # mask = np.ones(face_size, dtype=np.float32)
+ # inv_mask = cv2.warpAffine(mask, inverse_affine, (w_up, h_up))
+ # # remove the black borders
+ # inv_mask_erosion = cv2.erode(
+ # inv_mask, np.ones((int(2 * self.upscale_factor), int(2 * self.upscale_factor)), np.uint8))
+ # pasted_face = inv_mask_erosion[:, :, None] * inv_restored
+ # total_face_area = np.sum(inv_mask_erosion) # // 3
+ # # add border
+ # if draw_box:
+ # h, w = face_size
+ # mask_border = np.ones((h, w, 3), dtype=np.float32)
+ # border = int(1400/np.sqrt(total_face_area))
+ # mask_border[border:h-border, border:w-border,:] = 0
+ # inv_mask_border = cv2.warpAffine(mask_border, inverse_affine, (w_up, h_up))
+ # inv_mask_borders.append(inv_mask_border)
+ # if not self.use_parse:
+ # # compute the fusion edge based on the area of face
+ # w_edge = int(total_face_area**0.5) // 20
+ # erosion_radius = w_edge * 2
+ # inv_mask_center = cv2.erode(inv_mask_erosion, np.ones((erosion_radius, erosion_radius), np.uint8))
+ # blur_size = w_edge * 2
+ # inv_soft_mask = cv2.GaussianBlur(inv_mask_center, (blur_size + 1, blur_size + 1), 0)
+ # if len(upsample_img.shape) == 2: # upsample_img is gray image
+ # upsample_img = upsample_img[:, :, None]
+ # inv_soft_mask = inv_soft_mask[:, :, None]
+
+ # always use square mask
+ mask = np.ones(face_size, dtype=np.float32)
+ inv_mask = cv2.warpAffine(mask, inverse_affine, (w_up, h_up))
+ # remove the black borders
+ inv_mask_erosion = cv2.erode(
+ inv_mask, np.ones((int(2 * self.upscale_factor), int(2 * self.upscale_factor)), np.uint8))
+ pasted_face = inv_mask_erosion[:, :, None] * inv_restored
+ total_face_area = np.sum(inv_mask_erosion) # // 3
+ # add border
+ if draw_box:
+ h, w = face_size
+ mask_border = np.ones((h, w, 3), dtype=np.float32)
+ border = int(1400/np.sqrt(total_face_area))
+ mask_border[border:h-border, border:w-border,:] = 0
+ inv_mask_border = cv2.warpAffine(mask_border, inverse_affine, (w_up, h_up))
+ inv_mask_borders.append(inv_mask_border)
+ # compute the fusion edge based on the area of face
+ w_edge = int(total_face_area**0.5) // 20
+ erosion_radius = w_edge * 2
+ inv_mask_center = cv2.erode(inv_mask_erosion, np.ones((erosion_radius, erosion_radius), np.uint8))
+ blur_size = w_edge * 2
+ inv_soft_mask = cv2.GaussianBlur(inv_mask_center, (blur_size + 1, blur_size + 1), 0)
+ if len(upsample_img.shape) == 2: # upsample_img is gray image
+ upsample_img = upsample_img[:, :, None]
+ inv_soft_mask = inv_soft_mask[:, :, None]
+
+ # parse mask
+ if self.use_parse:
+ # inference
+ face_input = cv2.resize(restored_face, (512, 512), interpolation=cv2.INTER_LINEAR)
+ face_input = img2tensor(face_input.astype('float32') / 255., bgr2rgb=True, float32=True)
+ normalize(face_input, (0.5, 0.5, 0.5), (0.5, 0.5, 0.5), inplace=True)
+ face_input = torch.unsqueeze(face_input, 0).to(self.device)
+ with torch.no_grad():
+ out = self.face_parse(face_input)[0]
+ out = out.argmax(dim=1).squeeze().cpu().numpy()
+
+ parse_mask = np.zeros(out.shape)
+ MASK_COLORMAP = [0, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 0, 255, 0, 0, 0]
+ for idx, color in enumerate(MASK_COLORMAP):
+ parse_mask[out == idx] = color
+ # blur the mask
+ parse_mask = cv2.GaussianBlur(parse_mask, (101, 101), 11)
+ parse_mask = cv2.GaussianBlur(parse_mask, (101, 101), 11)
+ # remove the black borders
+ thres = 10
+ parse_mask[:thres, :] = 0
+ parse_mask[-thres:, :] = 0
+ parse_mask[:, :thres] = 0
+ parse_mask[:, -thres:] = 0
+ parse_mask = parse_mask / 255.
+
+ parse_mask = cv2.resize(parse_mask, face_size)
+ parse_mask = cv2.warpAffine(parse_mask, inverse_affine, (w_up, h_up), flags=3)
+ inv_soft_parse_mask = parse_mask[:, :, None]
+ # pasted_face = inv_restored
+ fuse_mask = (inv_soft_parse_mask<inv_soft_mask).astype('int')
+ inv_soft_mask = inv_soft_parse_mask*fuse_mask + inv_soft_mask*(1-fuse_mask)
+
+ if len(upsample_img.shape) == 3 and upsample_img.shape[2] == 4: # alpha channel
+ alpha = upsample_img[:, :, 3:]
+ upsample_img = inv_soft_mask * pasted_face + (1 - inv_soft_mask) * upsample_img[:, :, 0:3]
+ upsample_img = np.concatenate((upsample_img, alpha), axis=2)
+ else:
+ upsample_img = inv_soft_mask * pasted_face + (1 - inv_soft_mask) * upsample_img
+
+ if np.max(upsample_img) > 256: # 16-bit image
+ upsample_img = upsample_img.astype(np.uint16)
+ else:
+ upsample_img = upsample_img.astype(np.uint8)
+
+ # draw bounding box
+ if draw_box:
+ # upsample_input_img = cv2.resize(input_img, (w_up, h_up))
+ img_color = np.ones([*upsample_img.shape], dtype=np.float32)
+ img_color[:,:,0] = 0
+ img_color[:,:,1] = 255
+ img_color[:,:,2] = 0
+ for inv_mask_border in inv_mask_borders:
+ upsample_img = inv_mask_border * img_color + (1 - inv_mask_border) * upsample_img
+ # upsample_input_img = inv_mask_border * img_color + (1 - inv_mask_border) * upsample_input_img
+
+ if save_path is not None:
+ path = os.path.splitext(save_path)[0]
+ save_path = f'{path}.{self.save_ext}'
+ imwrite(upsample_img, save_path)
+ return upsample_img
+
+ def clean_all(self):
+ self.all_landmarks_5 = []
+ self.restored_faces = []
+ self.affine_matrices = []
+ self.cropped_faces = []
+ self.inverse_affine_matrices = []
+ self.det_faces = []
+ self.pad_input_imgs = []
diff --git a/r_facelib/utils/face_utils.py b/r_facelib/utils/face_utils.py
new file mode 100644
index 0000000..657ad25
--- /dev/null
+++ b/r_facelib/utils/face_utils.py
@@ -0,0 +1,248 @@
+import cv2
+import numpy as np
+import torch
+
+
+def compute_increased_bbox(bbox, increase_area, preserve_aspect=True):
+ left, top, right, bot = bbox
+ width = right - left
+ height = bot - top
+
+ if preserve_aspect:
+ width_increase = max(increase_area, ((1 + 2 * increase_area) * height - width) / (2 * width))
+ height_increase = max(increase_area, ((1 + 2 * increase_area) * width - height) / (2 * height))
+ else:
+ width_increase = height_increase = increase_area
+ left = int(left - width_increase * width)
+ top = int(top - height_increase * height)
+ right = int(right + width_increase * width)
+ bot = int(bot + height_increase * height)
+ return (left, top, right, bot)
+
+
+def get_valid_bboxes(bboxes, h, w):
+ left = max(bboxes[0], 0)
+ top = max(bboxes[1], 0)
+ right = min(bboxes[2], w)
+ bottom = min(bboxes[3], h)
+ return (left, top, right, bottom)
+
+
+def align_crop_face_landmarks(img,
+ landmarks,
+ output_size,
+ transform_size=None,
+ enable_padding=True,
+ return_inverse_affine=False,
+ shrink_ratio=(1, 1)):
+ """Align and crop face with landmarks.
+
+ The output_size and transform_size are based on width. The height is
+ adjusted based on shrink_ratio_h/shring_ration_w.
+
+ Modified from:
+ https://github.com/NVlabs/ffhq-dataset/blob/master/download_ffhq.py
+
+ Args:
+ img (Numpy array): Input image.
+ landmarks (Numpy array): 5 or 68 or 98 landmarks.
+ output_size (int): Output face size.
+ transform_size (ing): Transform size. Usually the four time of
+ output_size.
+ enable_padding (float): Default: True.
+ shrink_ratio (float | tuple[float] | list[float]): Shring the whole
+ face for height and width (crop larger area). Default: (1, 1).
+
+ Returns:
+ (Numpy array): Cropped face.
+ """
+ lm_type = 'retinaface_5' # Options: dlib_5, retinaface_5
+
+ if isinstance(shrink_ratio, (float, int)):
+ shrink_ratio = (shrink_ratio, shrink_ratio)
+ if transform_size is None:
+ transform_size = output_size * 4
+
+ # Parse landmarks
+ lm = np.array(landmarks)
+ if lm.shape[0] == 5 and lm_type == 'retinaface_5':
+ eye_left = lm[0]
+ eye_right = lm[1]
+ mouth_avg = (lm[3] + lm[4]) * 0.5
+ elif lm.shape[0] == 5 and lm_type == 'dlib_5':
+ lm_eye_left = lm[2:4]
+ lm_eye_right = lm[0:2]
+ eye_left = np.mean(lm_eye_left, axis=0)
+ eye_right = np.mean(lm_eye_right, axis=0)
+ mouth_avg = lm[4]
+ elif lm.shape[0] == 68:
+ lm_eye_left = lm[36:42]
+ lm_eye_right = lm[42:48]
+ eye_left = np.mean(lm_eye_left, axis=0)
+ eye_right = np.mean(lm_eye_right, axis=0)
+ mouth_avg = (lm[48] + lm[54]) * 0.5
+ elif lm.shape[0] == 98:
+ lm_eye_left = lm[60:68]
+ lm_eye_right = lm[68:76]
+ eye_left = np.mean(lm_eye_left, axis=0)
+ eye_right = np.mean(lm_eye_right, axis=0)
+ mouth_avg = (lm[76] + lm[82]) * 0.5
+
+ eye_avg = (eye_left + eye_right) * 0.5
+ eye_to_eye = eye_right - eye_left
+ eye_to_mouth = mouth_avg - eye_avg
+
+ # Get the oriented crop rectangle
+ # x: half width of the oriented crop rectangle
+ x = eye_to_eye - np.flipud(eye_to_mouth) * [-1, 1]
+ # - np.flipud(eye_to_mouth) * [-1, 1]: rotate 90 clockwise
+ # norm with the hypotenuse: get the direction
+ x /= np.hypot(*x) # get the hypotenuse of a right triangle
+ rect_scale = 1 # TODO: you can edit it to get larger rect
+ x *= max(np.hypot(*eye_to_eye) * 2.0 * rect_scale, np.hypot(*eye_to_mouth) * 1.8 * rect_scale)
+ # y: half height of the oriented crop rectangle
+ y = np.flipud(x) * [-1, 1]
+
+ x *= shrink_ratio[1] # width
+ y *= shrink_ratio[0] # height
+
+ # c: center
+ c = eye_avg + eye_to_mouth * 0.1
+ # quad: (left_top, left_bottom, right_bottom, right_top)
+ quad = np.stack([c - x - y, c - x + y, c + x + y, c + x - y])
+ # qsize: side length of the square
+ qsize = np.hypot(*x) * 2
+
+ quad_ori = np.copy(quad)
+ # Shrink, for large face
+ # TODO: do we really need shrink
+ shrink = int(np.floor(qsize / output_size * 0.5))
+ if shrink > 1:
+ h, w = img.shape[0:2]
+ rsize = (int(np.rint(float(w) / shrink)), int(np.rint(float(h) / shrink)))
+ img = cv2.resize(img, rsize, interpolation=cv2.INTER_AREA)
+ quad /= shrink
+ qsize /= shrink
+
+ # Crop
+ h, w = img.shape[0:2]
+ border = max(int(np.rint(qsize * 0.1)), 3)
+ crop = (int(np.floor(min(quad[:, 0]))), int(np.floor(min(quad[:, 1]))), int(np.ceil(max(quad[:, 0]))),
+ int(np.ceil(max(quad[:, 1]))))
+ crop = (max(crop[0] - border, 0), max(crop[1] - border, 0), min(crop[2] + border, w), min(crop[3] + border, h))
+ if crop[2] - crop[0] < w or crop[3] - crop[1] < h:
+ img = img[crop[1]:crop[3], crop[0]:crop[2], :]
+ quad -= crop[0:2]
+
+ # Pad
+ # pad: (width_left, height_top, width_right, height_bottom)
+ h, w = img.shape[0:2]
+ pad = (int(np.floor(min(quad[:, 0]))), int(np.floor(min(quad[:, 1]))), int(np.ceil(max(quad[:, 0]))),
+ int(np.ceil(max(quad[:, 1]))))
+ pad = (max(-pad[0] + border, 0), max(-pad[1] + border, 0), max(pad[2] - w + border, 0), max(pad[3] - h + border, 0))
+ if enable_padding and max(pad) > border - 4:
+ pad = np.maximum(pad, int(np.rint(qsize * 0.3)))
+ img = np.pad(img, ((pad[1], pad[3]), (pad[0], pad[2]), (0, 0)), 'reflect')
+ h, w = img.shape[0:2]
+ y, x, _ = np.ogrid[:h, :w, :1]
+ mask = np.maximum(1.0 - np.minimum(np.float32(x) / pad[0],
+ np.float32(w - 1 - x) / pad[2]),
+ 1.0 - np.minimum(np.float32(y) / pad[1],
+ np.float32(h - 1 - y) / pad[3]))
+ blur = int(qsize * 0.02)
+ if blur % 2 == 0:
+ blur += 1
+ blur_img = cv2.boxFilter(img, 0, ksize=(blur, blur))
+
+ img = img.astype('float32')
+ img += (blur_img - img) * np.clip(mask * 3.0 + 1.0, 0.0, 1.0)
+ img += (np.median(img, axis=(0, 1)) - img) * np.clip(mask, 0.0, 1.0)
+ img = np.clip(img, 0, 255) # float32, [0, 255]
+ quad += pad[:2]
+
+ # Transform use cv2
+ h_ratio = shrink_ratio[0] / shrink_ratio[1]
+ dst_h, dst_w = int(transform_size * h_ratio), transform_size
+ template = np.array([[0, 0], [0, dst_h], [dst_w, dst_h], [dst_w, 0]])
+ # use cv2.LMEDS method for the equivalence to skimage transform
+ # ref: https://blog.csdn.net/yichxi/article/details/115827338
+ affine_matrix = cv2.estimateAffinePartial2D(quad, template, method=cv2.LMEDS)[0]
+ cropped_face = cv2.warpAffine(
+ img, affine_matrix, (dst_w, dst_h), borderMode=cv2.BORDER_CONSTANT, borderValue=(135, 133, 132)) # gray
+
+ if output_size < transform_size:
+ cropped_face = cv2.resize(
+ cropped_face, (output_size, int(output_size * h_ratio)), interpolation=cv2.INTER_LINEAR)
+
+ if return_inverse_affine:
+ dst_h, dst_w = int(output_size * h_ratio), output_size
+ template = np.array([[0, 0], [0, dst_h], [dst_w, dst_h], [dst_w, 0]])
+ # use cv2.LMEDS method for the equivalence to skimage transform
+ # ref: https://blog.csdn.net/yichxi/article/details/115827338
+ affine_matrix = cv2.estimateAffinePartial2D(
+ quad_ori, np.array([[0, 0], [0, output_size], [dst_w, dst_h], [dst_w, 0]]), method=cv2.LMEDS)[0]
+ inverse_affine = cv2.invertAffineTransform(affine_matrix)
+ else:
+ inverse_affine = None
+ return cropped_face, inverse_affine
+
+
+def paste_face_back(img, face, inverse_affine):
+ h, w = img.shape[0:2]
+ face_h, face_w = face.shape[0:2]
+ inv_restored = cv2.warpAffine(face, inverse_affine, (w, h))
+ mask = np.ones((face_h, face_w, 3), dtype=np.float32)
+ inv_mask = cv2.warpAffine(mask, inverse_affine, (w, h))
+ # remove the black borders
+ inv_mask_erosion = cv2.erode(inv_mask, np.ones((2, 2), np.uint8))
+ inv_restored_remove_border = inv_mask_erosion * inv_restored
+ total_face_area = np.sum(inv_mask_erosion) // 3
+ # compute the fusion edge based on the area of face
+ w_edge = int(total_face_area**0.5) // 20
+ erosion_radius = w_edge * 2
+ inv_mask_center = cv2.erode(inv_mask_erosion, np.ones((erosion_radius, erosion_radius), np.uint8))
+ blur_size = w_edge * 2
+ inv_soft_mask = cv2.GaussianBlur(inv_mask_center, (blur_size + 1, blur_size + 1), 0)
+ img = inv_soft_mask * inv_restored_remove_border + (1 - inv_soft_mask) * img
+ # float32, [0, 255]
+ return img
+
+
+if __name__ == '__main__':
+ import os
+
+ from custom_nodes.facerestore.facelib.detection import init_detection_model
+ from custom_nodes.facerestore.facelib.utils.face_restoration_helper import get_largest_face
+
+ img_path = '/home/wxt/datasets/ffhq/ffhq_wild/00009.png'
+ img_name = os.splitext(os.path.basename(img_path))[0]
+
+ # initialize model
+ det_net = init_detection_model('retinaface_resnet50', half=False)
+ img_ori = cv2.imread(img_path)
+ h, w = img_ori.shape[0:2]
+ # if larger than 800, scale it
+ scale = max(h / 800, w / 800)
+ if scale > 1:
+ img = cv2.resize(img_ori, (int(w / scale), int(h / scale)), interpolation=cv2.INTER_LINEAR)
+
+ with torch.no_grad():
+ bboxes = det_net.detect_faces(img, 0.97)
+ if scale > 1:
+ bboxes *= scale # the score is incorrect
+ bboxes = get_largest_face(bboxes, h, w)[0]
+
+ landmarks = np.array([[bboxes[i], bboxes[i + 1]] for i in range(5, 15, 2)])
+
+ cropped_face, inverse_affine = align_crop_face_landmarks(
+ img_ori,
+ landmarks,
+ output_size=512,
+ transform_size=None,
+ enable_padding=True,
+ return_inverse_affine=True,
+ shrink_ratio=(1, 1))
+
+ cv2.imwrite(f'tmp/{img_name}_cropeed_face.png', cropped_face)
+ img = paste_face_back(img_ori, cropped_face, inverse_affine)
+ cv2.imwrite(f'tmp/{img_name}_back.png', img)
diff --git a/r_facelib/utils/misc.py b/r_facelib/utils/misc.py
new file mode 100644
index 0000000..6ea7c65
--- /dev/null
+++ b/r_facelib/utils/misc.py
@@ -0,0 +1,143 @@
+import cv2
+import os
+import os.path as osp
+import torch
+from torch.hub import download_url_to_file, get_dir
+from urllib.parse import urlparse
+# from basicsr.utils.download_util import download_file_from_google_drive
+#import gdown
+
+
+ROOT_DIR = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+
+def download_pretrained_models(file_ids, save_path_root):
+ os.makedirs(save_path_root, exist_ok=True)
+
+ for file_name, file_id in file_ids.items():
+ file_url = 'https://drive.google.com/uc?id='+file_id
+ save_path = osp.abspath(osp.join(save_path_root, file_name))
+ if osp.exists(save_path):
+ user_response = input(f'{file_name} already exist. Do you want to cover it? Y/N\n')
+ if user_response.lower() == 'y':
+ print(f'Covering {file_name} to {save_path}')
+ print("skipping gdown in facelib/utils/misc.py "+file_url)
+ #gdown.download(file_url, save_path, quiet=False)
+ # download_file_from_google_drive(file_id, save_path)
+ elif user_response.lower() == 'n':
+ print(f'Skipping {file_name}')
+ else:
+ raise ValueError('Wrong input. Only accepts Y/N.')
+ else:
+ print(f'Downloading {file_name} to {save_path}')
+ print("skipping gdown in facelib/utils/misc.py "+file_url)
+ #gdown.download(file_url, save_path, quiet=False)
+ # download_file_from_google_drive(file_id, save_path)
+
+
+def imwrite(img, file_path, params=None, auto_mkdir=True):
+ """Write image to file.
+
+ Args:
+ img (ndarray): Image array to be written.
+ file_path (str): Image file path.
+ params (None or list): Same as opencv's :func:`imwrite` interface.
+ auto_mkdir (bool): If the parent folder of `file_path` does not exist,
+ whether to create it automatically.
+
+ Returns:
+ bool: Successful or not.
+ """
+ if auto_mkdir:
+ dir_name = os.path.abspath(os.path.dirname(file_path))
+ os.makedirs(dir_name, exist_ok=True)
+ return cv2.imwrite(file_path, img, params)
+
+
+def img2tensor(imgs, bgr2rgb=True, float32=True):
+ """Numpy array to tensor.
+
+ Args:
+ imgs (list[ndarray] | ndarray): Input images.
+ bgr2rgb (bool): Whether to change bgr to rgb.
+ float32 (bool): Whether to change to float32.
+
+ Returns:
+ list[tensor] | tensor: Tensor images. If returned results only have
+ one element, just return tensor.
+ """
+
+ def _totensor(img, bgr2rgb, float32):
+ if img.shape[2] == 3 and bgr2rgb:
+ if img.dtype == 'float64':
+ img = img.astype('float32')
+ img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+ img = torch.from_numpy(img.transpose(2, 0, 1))
+ if float32:
+ img = img.float()
+ return img
+
+ if isinstance(imgs, list):
+ return [_totensor(img, bgr2rgb, float32) for img in imgs]
+ else:
+ return _totensor(imgs, bgr2rgb, float32)
+
+
+def load_file_from_url(url, model_dir=None, progress=True, file_name=None):
+ """Ref:https://github.com/1adrianb/face-alignment/blob/master/face_alignment/utils.py
+ """
+ if model_dir is None:
+ hub_dir = get_dir()
+ model_dir = os.path.join(hub_dir, 'checkpoints')
+
+ os.makedirs(os.path.join(ROOT_DIR, model_dir), exist_ok=True)
+
+ parts = urlparse(url)
+ filename = os.path.basename(parts.path)
+ if file_name is not None:
+ filename = file_name
+ cached_file = os.path.abspath(os.path.join(ROOT_DIR, model_dir, filename))
+ if not os.path.exists(cached_file):
+ print(f'Downloading: "{url}" to {cached_file}\n')
+ download_url_to_file(url, cached_file, hash_prefix=None, progress=progress)
+ return cached_file
+
+
+def scandir(dir_path, suffix=None, recursive=False, full_path=False):
+ """Scan a directory to find the interested files.
+ Args:
+ dir_path (str): Path of the directory.
+ suffix (str | tuple(str), optional): File suffix that we are
+ interested in. Default: None.
+ recursive (bool, optional): If set to True, recursively scan the
+ directory. Default: False.
+ full_path (bool, optional): If set to True, include the dir_path.
+ Default: False.
+ Returns:
+ A generator for all the interested files with relative paths.
+ """
+
+ if (suffix is not None) and not isinstance(suffix, (str, tuple)):
+ raise TypeError('"suffix" must be a string or tuple of strings')
+
+ root = dir_path
+
+ def _scandir(dir_path, suffix, recursive):
+ for entry in os.scandir(dir_path):
+ if not entry.name.startswith('.') and entry.is_file():
+ if full_path:
+ return_path = entry.path
+ else:
+ return_path = osp.relpath(entry.path, root)
+
+ if suffix is None:
+ yield return_path
+ elif return_path.endswith(suffix):
+ yield return_path
+ else:
+ if recursive:
+ yield from _scandir(entry.path, suffix=suffix, recursive=recursive)
+ else:
+ continue
+
+ return _scandir(dir_path, suffix=suffix, recursive=recursive)
diff --git a/reactor_patcher.py b/reactor_patcher.py
new file mode 100644
index 0000000..1818def
--- /dev/null
+++ b/reactor_patcher.py
@@ -0,0 +1,135 @@
+import os.path as osp
+import glob
+import logging
+import insightface
+from insightface.model_zoo.model_zoo import ModelRouter, PickableInferenceSession
+from insightface.model_zoo.retinaface import RetinaFace
+from insightface.model_zoo.landmark import Landmark
+from insightface.model_zoo.attribute import Attribute
+from insightface.model_zoo.inswapper import INSwapper
+from insightface.model_zoo.arcface_onnx import ArcFaceONNX
+from insightface.app import FaceAnalysis
+from insightface.utils import DEFAULT_MP_NAME, ensure_available
+from insightface.model_zoo import model_zoo
+import onnxruntime
+import onnx
+from onnx import numpy_helper
+from scripts.reactor_logger import logger
+
+
+def patched_get_model(self, **kwargs):
+ session = PickableInferenceSession(self.onnx_file, **kwargs)
+ inputs = session.get_inputs()
+ input_cfg = inputs[0]
+ input_shape = input_cfg.shape
+ outputs = session.get_outputs()
+
+ if len(outputs) >= 5:
+ return RetinaFace(model_file=self.onnx_file, session=session)
+ elif input_shape[2] == 192 and input_shape[3] == 192:
+ return Landmark(model_file=self.onnx_file, session=session)
+ elif input_shape[2] == 96 and input_shape[3] == 96:
+ return Attribute(model_file=self.onnx_file, session=session)
+ elif len(inputs) == 2 and input_shape[2] == 128 and input_shape[3] == 128:
+ return INSwapper(model_file=self.onnx_file, session=session)
+ elif input_shape[2] == input_shape[3] and input_shape[2] >= 112 and input_shape[2] % 16 == 0:
+ return ArcFaceONNX(model_file=self.onnx_file, session=session)
+ else:
+ return None
+
+
+def patched_faceanalysis_init(self, name=DEFAULT_MP_NAME, root='~/.insightface', allowed_modules=None, **kwargs):
+ onnxruntime.set_default_logger_severity(3)
+ self.models = {}
+ self.model_dir = ensure_available('models', name, root=root)
+ onnx_files = glob.glob(osp.join(self.model_dir, '*.onnx'))
+ onnx_files = sorted(onnx_files)
+ for onnx_file in onnx_files:
+ model = model_zoo.get_model(onnx_file, **kwargs)
+ if model is None:
+ print('model not recognized:', onnx_file)
+ elif allowed_modules is not None and model.taskname not in allowed_modules:
+ print('model ignore:', onnx_file, model.taskname)
+ del model
+ elif model.taskname not in self.models and (allowed_modules is None or model.taskname in allowed_modules):
+ self.models[model.taskname] = model
+ else:
+ print('duplicated model task type, ignore:', onnx_file, model.taskname)
+ del model
+ assert 'detection' in self.models
+ self.det_model = self.models['detection']
+
+
+def patched_faceanalysis_prepare(self, ctx_id, det_thresh=0.5, det_size=(640, 640)):
+ self.det_thresh = det_thresh
+ assert det_size is not None
+ self.det_size = det_size
+ for taskname, model in self.models.items():
+ if taskname == 'detection':
+ model.prepare(ctx_id, input_size=det_size, det_thresh=det_thresh)
+ else:
+ model.prepare(ctx_id)
+
+
+def patched_inswapper_init(self, model_file=None, session=None):
+ self.model_file = model_file
+ self.session = session
+ model = onnx.load(self.model_file)
+ graph = model.graph
+ self.emap = numpy_helper.to_array(graph.initializer[-1])
+ self.input_mean = 0.0
+ self.input_std = 255.0
+ if self.session is None:
+ self.session = onnxruntime.InferenceSession(self.model_file, None)
+ inputs = self.session.get_inputs()
+ self.input_names = []
+ for inp in inputs:
+ self.input_names.append(inp.name)
+ outputs = self.session.get_outputs()
+ output_names = []
+ for out in outputs:
+ output_names.append(out.name)
+ self.output_names = output_names
+ assert len(self.output_names) == 1
+ input_cfg = inputs[0]
+ input_shape = input_cfg.shape
+ self.input_shape = input_shape
+ self.input_size = tuple(input_shape[2:4][::-1])
+
+
+def pathced_retinaface_prepare(self, ctx_id, **kwargs):
+ if ctx_id<0:
+ self.session.set_providers(['CPUExecutionProvider'])
+ nms_thresh = kwargs.get('nms_thresh', None)
+ if nms_thresh is not None:
+ self.nms_thresh = nms_thresh
+ det_thresh = kwargs.get('det_thresh', None)
+ if det_thresh is not None:
+ self.det_thresh = det_thresh
+ input_size = kwargs.get('input_size', None)
+ if input_size is not None and self.input_size is None:
+ self.input_size = input_size
+
+
+def patch_insightface(get_model, faceanalysis_init, faceanalysis_prepare, inswapper_init, retinaface_prepare):
+ insightface.model_zoo.model_zoo.ModelRouter.get_model = get_model
+ insightface.app.FaceAnalysis.__init__ = faceanalysis_init
+ insightface.app.FaceAnalysis.prepare = faceanalysis_prepare
+ insightface.model_zoo.inswapper.INSwapper.__init__ = inswapper_init
+ insightface.model_zoo.retinaface.RetinaFace.prepare = retinaface_prepare
+
+
+original_functions = [ModelRouter.get_model, FaceAnalysis.__init__, FaceAnalysis.prepare, INSwapper.__init__, RetinaFace.prepare]
+patched_functions = [patched_get_model, patched_faceanalysis_init, patched_faceanalysis_prepare, patched_inswapper_init, pathced_retinaface_prepare]
+
+
+def apply_patch(console_log_level):
+ if console_log_level == 0:
+ patch_insightface(*patched_functions)
+ logger.setLevel(logging.WARNING)
+ elif console_log_level == 1:
+ patch_insightface(*patched_functions)
+ logger.setLevel(logging.STATUS)
+ elif console_log_level == 2:
+ patch_insightface(*original_functions)
+ logger.setLevel(logging.INFO)
diff --git a/reactor_utils.py b/reactor_utils.py
new file mode 100644
index 0000000..bf16325
--- /dev/null
+++ b/reactor_utils.py
@@ -0,0 +1,231 @@
+import os
+from PIL import Image
+import numpy as np
+import torch
+from torchvision.utils import make_grid
+import cv2
+import math
+import logging
+import hashlib
+from insightface.app.common import Face
+from safetensors.torch import save_file, safe_open
+from tqdm import tqdm
+import urllib.request
+import onnxruntime
+from typing import Any
+import folder_paths
+
+ORT_SESSION = None
+
+def tensor_to_pil(img_tensor, batch_index=0):
+ # Convert tensor of shape [batch_size, channels, height, width] at the batch_index to PIL Image
+ img_tensor = img_tensor[batch_index].unsqueeze(0)
+ i = 255. * img_tensor.cpu().numpy()
+ img = Image.fromarray(np.clip(i, 0, 255).astype(np.uint8).squeeze())
+ return img
+
+
+def batch_tensor_to_pil(img_tensor):
+ # Convert tensor of shape [batch_size, channels, height, width] to a list of PIL Images
+ return [tensor_to_pil(img_tensor, i) for i in range(img_tensor.shape[0])]
+
+
+def pil_to_tensor(image):
+ # Takes a PIL image and returns a tensor of shape [1, height, width, channels]
+ image = np.array(image).astype(np.float32) / 255.0
+ image = torch.from_numpy(image).unsqueeze(0)
+ if len(image.shape) == 3: # If the image is grayscale, add a channel dimension
+ image = image.unsqueeze(-1)
+ return image
+
+
+def batched_pil_to_tensor(images):
+ # Takes a list of PIL images and returns a tensor of shape [batch_size, height, width, channels]
+ return torch.cat([pil_to_tensor(image) for image in images], dim=0)
+
+
+def img2tensor(imgs, bgr2rgb=True, float32=True):
+
+ def _totensor(img, bgr2rgb, float32):
+ if img.shape[2] == 3 and bgr2rgb:
+ if img.dtype == 'float64':
+ img = img.astype('float32')
+ img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+ img = torch.from_numpy(img.transpose(2, 0, 1))
+ if float32:
+ img = img.float()
+ return img
+
+ if isinstance(imgs, list):
+ return [_totensor(img, bgr2rgb, float32) for img in imgs]
+ else:
+ return _totensor(imgs, bgr2rgb, float32)
+
+
+def tensor2img(tensor, rgb2bgr=True, out_type=np.uint8, min_max=(0, 1)):
+
+ if not (torch.is_tensor(tensor) or (isinstance(tensor, list) and all(torch.is_tensor(t) for t in tensor))):
+ raise TypeError(f'tensor or list of tensors expected, got {type(tensor)}')
+
+ if torch.is_tensor(tensor):
+ tensor = [tensor]
+ result = []
+ for _tensor in tensor:
+ _tensor = _tensor.squeeze(0).float().detach().cpu().clamp_(*min_max)
+ _tensor = (_tensor - min_max[0]) / (min_max[1] - min_max[0])
+
+ n_dim = _tensor.dim()
+ if n_dim == 4:
+ img_np = make_grid(_tensor, nrow=int(math.sqrt(_tensor.size(0))), normalize=False).numpy()
+ img_np = img_np.transpose(1, 2, 0)
+ if rgb2bgr:
+ img_np = cv2.cvtColor(img_np, cv2.COLOR_RGB2BGR)
+ elif n_dim == 3:
+ img_np = _tensor.numpy()
+ img_np = img_np.transpose(1, 2, 0)
+ if img_np.shape[2] == 1: # gray image
+ img_np = np.squeeze(img_np, axis=2)
+ else:
+ if rgb2bgr:
+ img_np = cv2.cvtColor(img_np, cv2.COLOR_RGB2BGR)
+ elif n_dim == 2:
+ img_np = _tensor.numpy()
+ else:
+ raise TypeError('Only support 4D, 3D or 2D tensor. ' f'But received with dimension: {n_dim}')
+ if out_type == np.uint8:
+ # Unlike MATLAB, numpy.unit8() WILL NOT round by default.
+ img_np = (img_np * 255.0).round()
+ img_np = img_np.astype(out_type)
+ result.append(img_np)
+ if len(result) == 1:
+ result = result[0]
+ return result
+
+
+def rgba2rgb_tensor(rgba):
+ r = rgba[...,0]
+ g = rgba[...,1]
+ b = rgba[...,2]
+ return torch.stack([r, g, b], dim=3)
+
+
+def download(url, path, name):
+ request = urllib.request.urlopen(url)
+ total = int(request.headers.get('Content-Length', 0))
+ with tqdm(total=total, desc=f'[ReActor] Downloading {name} to {path}', unit='B', unit_scale=True, unit_divisor=1024) as progress:
+ urllib.request.urlretrieve(url, path, reporthook=lambda count, block_size, total_size: progress.update(block_size))
+
+
+def move_path(old_path, new_path):
+ if os.path.exists(old_path):
+ try:
+ models = os.listdir(old_path)
+ for model in models:
+ move_old_path = os.path.join(old_path, model)
+ move_new_path = os.path.join(new_path, model)
+ os.rename(move_old_path, move_new_path)
+ os.rmdir(old_path)
+ except Exception as e:
+ print(f"Error: {e}")
+ new_path = old_path
+
+
+def addLoggingLevel(levelName, levelNum, methodName=None):
+ if not methodName:
+ methodName = levelName.lower()
+
+ def logForLevel(self, message, *args, **kwargs):
+ if self.isEnabledFor(levelNum):
+ self._log(levelNum, message, args, **kwargs)
+
+ def logToRoot(message, *args, **kwargs):
+ logging.log(levelNum, message, *args, **kwargs)
+
+ logging.addLevelName(levelNum, levelName)
+ setattr(logging, levelName, levelNum)
+ setattr(logging.getLoggerClass(), methodName, logForLevel)
+ setattr(logging, methodName, logToRoot)
+
+
+def get_image_md5hash(image: Image.Image):
+ md5hash = hashlib.md5(image.tobytes())
+ return md5hash.hexdigest()
+
+
+def save_face_model(face: Face, filename: str) -> None:
+ try:
+ tensors = {
+ "bbox": torch.tensor(face["bbox"]),
+ "kps": torch.tensor(face["kps"]),
+ "det_score": torch.tensor(face["det_score"]),
+ "landmark_3d_68": torch.tensor(face["landmark_3d_68"]),
+ "pose": torch.tensor(face["pose"]),
+ "landmark_2d_106": torch.tensor(face["landmark_2d_106"]),
+ "embedding": torch.tensor(face["embedding"]),
+ "gender": torch.tensor(face["gender"]),
+ "age": torch.tensor(face["age"]),
+ }
+ save_file(tensors, filename)
+ print(f"Face model has been saved to '{filename}'")
+ except Exception as e:
+ print(f"Error: {e}")
+
+
+def load_face_model(filename: str):
+ face = {}
+ with safe_open(filename, framework="pt") as f:
+ for k in f.keys():
+ face[k] = f.get_tensor(k).numpy()
+ return Face(face)
+
+
+def get_ort_session():
+ global ORT_SESSION
+ return ORT_SESSION
+
+def set_ort_session(model_path, providers) -> Any:
+ global ORT_SESSION
+ onnxruntime.set_default_logger_severity(3)
+ ORT_SESSION = onnxruntime.InferenceSession(model_path, providers=providers)
+ return ORT_SESSION
+
+def clear_ort_session() -> None:
+ global ORT_SESSION
+ ORT_SESSION = None
+
+def prepare_cropped_face(cropped_face):
+ cropped_face = cropped_face[:, :, ::-1] / 255.0
+ cropped_face = (cropped_face - 0.5) / 0.5
+ cropped_face = np.expand_dims(cropped_face.transpose(2, 0, 1), axis = 0).astype(np.float32)
+ return cropped_face
+
+def normalize_cropped_face(cropped_face):
+ cropped_face = np.clip(cropped_face, -1, 1)
+ cropped_face = (cropped_face + 1) / 2
+ cropped_face = cropped_face.transpose(1, 2, 0)
+ cropped_face = (cropped_face * 255.0).round()
+ cropped_face = cropped_face.astype(np.uint8)[:, :, ::-1]
+ return cropped_face
+
+
+# author: Trung0246 --->
+def add_folder_path_and_extensions(folder_name, full_folder_paths, extensions):
+ # Iterate over the list of full folder paths
+ for full_folder_path in full_folder_paths:
+ # Use the provided function to add each model folder path
+ folder_paths.add_model_folder_path(folder_name, full_folder_path)
+
+ # Now handle the extensions. If the folder name already exists, update the extensions
+ if folder_name in folder_paths.folder_names_and_paths:
+ # Unpack the current paths and extensions
+ current_paths, current_extensions = folder_paths.folder_names_and_paths[folder_name]
+ # Update the extensions set with the new extensions
+ updated_extensions = current_extensions | extensions
+ # Reassign the updated tuple back to the dictionary
+ folder_paths.folder_names_and_paths[folder_name] = (current_paths, updated_extensions)
+ else:
+ # If the folder name was not present, add_model_folder_path would have added it with the last path
+ # Now we just need to update the set of extensions as it would be an empty set
+ # Also ensure that all paths are included (since add_model_folder_path adds only one path at a time)
+ folder_paths.folder_names_and_paths[folder_name] = (full_folder_paths, extensions)
+# <---
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..d6d85e9
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,7 @@
+albumentations>=1.4.16
+insightface==0.7.3
+onnx>=1.14.0
+opencv-python>=4.7.0.72
+numpy==1.26.3
+segment_anything
+ultralytics
diff --git a/scripts/__init__.py b/scripts/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/scripts/__init__.py
diff --git a/scripts/r_archs/__init__.py b/scripts/r_archs/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/scripts/r_archs/__init__.py
diff --git a/scripts/r_archs/codeformer_arch.py b/scripts/r_archs/codeformer_arch.py
new file mode 100644
index 0000000..588ef69
--- /dev/null
+++ b/scripts/r_archs/codeformer_arch.py
@@ -0,0 +1,278 @@
+import math
+import numpy as np
+import torch
+from torch import nn, Tensor
+import torch.nn.functional as F
+from typing import Optional, List
+
+from scripts.r_archs.vqgan_arch import *
+from r_basicsr.utils import get_root_logger
+from r_basicsr.utils.registry import ARCH_REGISTRY
+
+
+def calc_mean_std(feat, eps=1e-5):
+ """Calculate mean and std for adaptive_instance_normalization.
+
+ Args:
+ feat (Tensor): 4D tensor.
+ eps (float): A small value added to the variance to avoid
+ divide-by-zero. Default: 1e-5.
+ """
+ size = feat.size()
+ assert len(size) == 4, 'The input feature should be 4D tensor.'
+ b, c = size[:2]
+ feat_var = feat.view(b, c, -1).var(dim=2) + eps
+ feat_std = feat_var.sqrt().view(b, c, 1, 1)
+ feat_mean = feat.view(b, c, -1).mean(dim=2).view(b, c, 1, 1)
+ return feat_mean, feat_std
+
+
+def adaptive_instance_normalization(content_feat, style_feat):
+ """Adaptive instance normalization.
+
+ Adjust the reference features to have the similar color and illuminations
+ as those in the degradate features.
+
+ Args:
+ content_feat (Tensor): The reference feature.
+ style_feat (Tensor): The degradate features.
+ """
+ size = content_feat.size()
+ style_mean, style_std = calc_mean_std(style_feat)
+ content_mean, content_std = calc_mean_std(content_feat)
+ normalized_feat = (content_feat - content_mean.expand(size)) / content_std.expand(size)
+ return normalized_feat * style_std.expand(size) + style_mean.expand(size)
+
+
+class PositionEmbeddingSine(nn.Module):
+ """
+ This is a more standard version of the position embedding, very similar to the one
+ used by the Attention is all you need paper, generalized to work on images.
+ """
+
+ def __init__(self, num_pos_feats=64, temperature=10000, normalize=False, scale=None):
+ super().__init__()
+ self.num_pos_feats = num_pos_feats
+ self.temperature = temperature
+ self.normalize = normalize
+ if scale is not None and normalize is False:
+ raise ValueError("normalize should be True if scale is passed")
+ if scale is None:
+ scale = 2 * math.pi
+ self.scale = scale
+
+ def forward(self, x, mask=None):
+ if mask is None:
+ mask = torch.zeros((x.size(0), x.size(2), x.size(3)), device=x.device, dtype=torch.bool)
+ not_mask = ~mask
+ y_embed = not_mask.cumsum(1, dtype=torch.float32)
+ x_embed = not_mask.cumsum(2, dtype=torch.float32)
+ if self.normalize:
+ eps = 1e-6
+ y_embed = y_embed / (y_embed[:, -1:, :] + eps) * self.scale
+ x_embed = x_embed / (x_embed[:, :, -1:] + eps) * self.scale
+
+ dim_t = torch.arange(self.num_pos_feats, dtype=torch.float32, device=x.device)
+ dim_t = self.temperature ** (2 * (dim_t // 2) / self.num_pos_feats)
+
+ pos_x = x_embed[:, :, :, None] / dim_t
+ pos_y = y_embed[:, :, :, None] / dim_t
+ pos_x = torch.stack(
+ (pos_x[:, :, :, 0::2].sin(), pos_x[:, :, :, 1::2].cos()), dim=4
+ ).flatten(3)
+ pos_y = torch.stack(
+ (pos_y[:, :, :, 0::2].sin(), pos_y[:, :, :, 1::2].cos()), dim=4
+ ).flatten(3)
+ pos = torch.cat((pos_y, pos_x), dim=3).permute(0, 3, 1, 2)
+ return pos
+
+def _get_activation_fn(activation):
+ """Return an activation function given a string"""
+ if activation == "relu":
+ return F.relu
+ if activation == "gelu":
+ return F.gelu
+ if activation == "glu":
+ return F.glu
+ raise RuntimeError(F"activation should be relu/gelu, not {activation}.")
+
+
+class TransformerSALayer(nn.Module):
+ def __init__(self, embed_dim, nhead=8, dim_mlp=2048, dropout=0.0, activation="gelu"):
+ super().__init__()
+ self.self_attn = nn.MultiheadAttention(embed_dim, nhead, dropout=dropout)
+ # Implementation of Feedforward model - MLP
+ self.linear1 = nn.Linear(embed_dim, dim_mlp)
+ self.dropout = nn.Dropout(dropout)
+ self.linear2 = nn.Linear(dim_mlp, embed_dim)
+
+ self.norm1 = nn.LayerNorm(embed_dim)
+ self.norm2 = nn.LayerNorm(embed_dim)
+ self.dropout1 = nn.Dropout(dropout)
+ self.dropout2 = nn.Dropout(dropout)
+
+ self.activation = _get_activation_fn(activation)
+
+ def with_pos_embed(self, tensor, pos: Optional[Tensor]):
+ return tensor if pos is None else tensor + pos
+
+ def forward(self, tgt,
+ tgt_mask: Optional[Tensor] = None,
+ tgt_key_padding_mask: Optional[Tensor] = None,
+ query_pos: Optional[Tensor] = None):
+
+ # self attention
+ tgt2 = self.norm1(tgt)
+ q = k = self.with_pos_embed(tgt2, query_pos)
+ tgt2 = self.self_attn(q, k, value=tgt2, attn_mask=tgt_mask,
+ key_padding_mask=tgt_key_padding_mask)[0]
+ tgt = tgt + self.dropout1(tgt2)
+
+ # ffn
+ tgt2 = self.norm2(tgt)
+ tgt2 = self.linear2(self.dropout(self.activation(self.linear1(tgt2))))
+ tgt = tgt + self.dropout2(tgt2)
+ return tgt
+
+class Fuse_sft_block(nn.Module):
+ def __init__(self, in_ch, out_ch):
+ super().__init__()
+ self.encode_enc = ResBlock(2*in_ch, out_ch)
+
+ self.scale = nn.Sequential(
+ nn.Conv2d(in_ch, out_ch, kernel_size=3, padding=1),
+ nn.LeakyReLU(0.2, True),
+ nn.Conv2d(out_ch, out_ch, kernel_size=3, padding=1))
+
+ self.shift = nn.Sequential(
+ nn.Conv2d(in_ch, out_ch, kernel_size=3, padding=1),
+ nn.LeakyReLU(0.2, True),
+ nn.Conv2d(out_ch, out_ch, kernel_size=3, padding=1))
+
+ def forward(self, enc_feat, dec_feat, w=1):
+ enc_feat = self.encode_enc(torch.cat([enc_feat, dec_feat], dim=1))
+ scale = self.scale(enc_feat)
+ shift = self.shift(enc_feat)
+ residual = w * (dec_feat * scale + shift)
+ out = dec_feat + residual
+ return out
+
+
+@ARCH_REGISTRY.register()
+class CodeFormer(VQAutoEncoder):
+ def __init__(self, dim_embd=512, n_head=8, n_layers=9,
+ codebook_size=1024, latent_size=256,
+ connect_list=['32', '64', '128', '256'],
+ fix_modules=['quantize','generator']):
+ super(CodeFormer, self).__init__(512, 64, [1, 2, 2, 4, 4, 8], 'nearest',2, [16], codebook_size)
+
+ if fix_modules is not None:
+ for module in fix_modules:
+ for param in getattr(self, module).parameters():
+ param.requires_grad = False
+
+ self.connect_list = connect_list
+ self.n_layers = n_layers
+ self.dim_embd = dim_embd
+ self.dim_mlp = dim_embd*2
+
+ self.position_emb = nn.Parameter(torch.zeros(latent_size, self.dim_embd))
+ self.feat_emb = nn.Linear(256, self.dim_embd)
+
+ # transformer
+ self.ft_layers = nn.Sequential(*[TransformerSALayer(embed_dim=dim_embd, nhead=n_head, dim_mlp=self.dim_mlp, dropout=0.0)
+ for _ in range(self.n_layers)])
+
+ # logits_predict head
+ self.idx_pred_layer = nn.Sequential(
+ nn.LayerNorm(dim_embd),
+ nn.Linear(dim_embd, codebook_size, bias=False))
+
+ self.channels = {
+ '16': 512,
+ '32': 256,
+ '64': 256,
+ '128': 128,
+ '256': 128,
+ '512': 64,
+ }
+
+ # after second residual block for > 16, before attn layer for ==16
+ self.fuse_encoder_block = {'512':2, '256':5, '128':8, '64':11, '32':14, '16':18}
+ # after first residual block for > 16, before attn layer for ==16
+ self.fuse_generator_block = {'16':6, '32': 9, '64':12, '128':15, '256':18, '512':21}
+
+ # fuse_convs_dict
+ self.fuse_convs_dict = nn.ModuleDict()
+ for f_size in self.connect_list:
+ in_ch = self.channels[f_size]
+ self.fuse_convs_dict[f_size] = Fuse_sft_block(in_ch, in_ch)
+
+ def _init_weights(self, module):
+ if isinstance(module, (nn.Linear, nn.Embedding)):
+ module.weight.data.normal_(mean=0.0, std=0.02)
+ if isinstance(module, nn.Linear) and module.bias is not None:
+ module.bias.data.zero_()
+ elif isinstance(module, nn.LayerNorm):
+ module.bias.data.zero_()
+ module.weight.data.fill_(1.0)
+
+ def forward(self, x, w=0, detach_16=True, code_only=False, adain=False):
+ # ################### Encoder #####################
+ enc_feat_dict = {}
+ out_list = [self.fuse_encoder_block[f_size] for f_size in self.connect_list]
+ for i, block in enumerate(self.encoder.blocks):
+ x = block(x)
+ if i in out_list:
+ enc_feat_dict[str(x.shape[-1])] = x.clone()
+
+ lq_feat = x
+ # ################# Transformer ###################
+ # quant_feat, codebook_loss, quant_stats = self.quantize(lq_feat)
+ pos_emb = self.position_emb.unsqueeze(1).repeat(1,x.shape[0],1)
+ # BCHW -> BC(HW) -> (HW)BC
+ feat_emb = self.feat_emb(lq_feat.flatten(2).permute(2,0,1))
+ query_emb = feat_emb
+ # Transformer encoder
+ for layer in self.ft_layers:
+ query_emb = layer(query_emb, query_pos=pos_emb)
+
+ # output logits
+ logits = self.idx_pred_layer(query_emb) # (hw)bn
+ logits = logits.permute(1,0,2) # (hw)bn -> b(hw)n
+
+ if code_only: # for training stage II
+ # logits doesn't need softmax before cross_entropy loss
+ return logits, lq_feat
+
+ # ################# Quantization ###################
+ # if self.training:
+ # quant_feat = torch.einsum('btn,nc->btc', [soft_one_hot, self.quantize.embedding.weight])
+ # # b(hw)c -> bc(hw) -> bchw
+ # quant_feat = quant_feat.permute(0,2,1).view(lq_feat.shape)
+ # ------------
+ soft_one_hot = F.softmax(logits, dim=2)
+ _, top_idx = torch.topk(soft_one_hot, 1, dim=2)
+ quant_feat = self.quantize.get_codebook_feat(top_idx, shape=[x.shape[0],16,16,256])
+ # preserve gradients
+ # quant_feat = lq_feat + (quant_feat - lq_feat).detach()
+
+ if detach_16:
+ quant_feat = quant_feat.detach() # for training stage III
+ if adain:
+ quant_feat = adaptive_instance_normalization(quant_feat, lq_feat)
+
+ # ################## Generator ####################
+ x = quant_feat
+ fuse_list = [self.fuse_generator_block[f_size] for f_size in self.connect_list]
+
+ for i, block in enumerate(self.generator.blocks):
+ x = block(x)
+ if i in fuse_list: # fuse after i-th block
+ f_size = str(x.shape[-1])
+ if w>0:
+ x = self.fuse_convs_dict[f_size](enc_feat_dict[f_size].detach(), x, w)
+ out = x
+ # logits doesn't need softmax before cross_entropy loss
+ return out, logits, lq_feat
+ \ No newline at end of file
diff --git a/scripts/r_archs/vqgan_arch.py b/scripts/r_archs/vqgan_arch.py
new file mode 100644
index 0000000..50b3712
--- /dev/null
+++ b/scripts/r_archs/vqgan_arch.py
@@ -0,0 +1,437 @@
+'''
+VQGAN code, adapted from the original created by the Unleashing Transformers authors:
+https://github.com/samb-t/unleashing-transformers/blob/master/models/vqgan.py
+
+'''
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import copy
+from r_basicsr.utils import get_root_logger
+from r_basicsr.utils.registry import ARCH_REGISTRY
+
+
+def normalize(in_channels):
+ return torch.nn.GroupNorm(num_groups=32, num_channels=in_channels, eps=1e-6, affine=True)
+
+
+@torch.jit.script
+def swish(x):
+ return x*torch.sigmoid(x)
+
+
+# Define VQVAE classes
+class VectorQuantizer(nn.Module):
+ def __init__(self, codebook_size, emb_dim, beta):
+ super(VectorQuantizer, self).__init__()
+ self.codebook_size = codebook_size # number of embeddings
+ self.emb_dim = emb_dim # dimension of embedding
+ self.beta = beta # commitment cost used in loss term, beta * ||z_e(x)-sg[e]||^2
+ self.embedding = nn.Embedding(self.codebook_size, self.emb_dim)
+ self.embedding.weight.data.uniform_(-1.0 / self.codebook_size, 1.0 / self.codebook_size)
+
+ def forward(self, z):
+ # reshape z -> (batch, height, width, channel) and flatten
+ z = z.permute(0, 2, 3, 1).contiguous()
+ z_flattened = z.view(-1, self.emb_dim)
+
+ # distances from z to embeddings e_j (z - e)^2 = z^2 + e^2 - 2 e * z
+ d = (z_flattened ** 2).sum(dim=1, keepdim=True) + (self.embedding.weight**2).sum(1) - \
+ 2 * torch.matmul(z_flattened, self.embedding.weight.t())
+
+ mean_distance = torch.mean(d)
+ # find closest encodings
+ # min_encoding_indices = torch.argmin(d, dim=1).unsqueeze(1)
+ min_encoding_scores, min_encoding_indices = torch.topk(d, 1, dim=1, largest=False)
+ # [0-1], higher score, higher confidence
+ min_encoding_scores = torch.exp(-min_encoding_scores/10)
+
+ min_encodings = torch.zeros(min_encoding_indices.shape[0], self.codebook_size).to(z)
+ min_encodings.scatter_(1, min_encoding_indices, 1)
+
+ # get quantized latent vectors
+ z_q = torch.matmul(min_encodings, self.embedding.weight).view(z.shape)
+ # compute loss for embedding
+ loss = torch.mean((z_q.detach()-z)**2) + self.beta * torch.mean((z_q - z.detach()) ** 2)
+ # preserve gradients
+ z_q = z + (z_q - z).detach()
+
+ # perplexity
+ e_mean = torch.mean(min_encodings, dim=0)
+ perplexity = torch.exp(-torch.sum(e_mean * torch.log(e_mean + 1e-10)))
+ # reshape back to match original input shape
+ z_q = z_q.permute(0, 3, 1, 2).contiguous()
+
+ return z_q, loss, {
+ "perplexity": perplexity,
+ "min_encodings": min_encodings,
+ "min_encoding_indices": min_encoding_indices,
+ "min_encoding_scores": min_encoding_scores,
+ "mean_distance": mean_distance
+ }
+
+ def get_codebook_feat(self, indices, shape):
+ # input indices: batch*token_num -> (batch*token_num)*1
+ # shape: batch, height, width, channel
+ indices = indices.view(-1,1)
+ min_encodings = torch.zeros(indices.shape[0], self.codebook_size).to(indices)
+ min_encodings.scatter_(1, indices, 1)
+ # get quantized latent vectors
+ z_q = torch.matmul(min_encodings.float(), self.embedding.weight)
+
+ if shape is not None: # reshape back to match original input shape
+ z_q = z_q.view(shape).permute(0, 3, 1, 2).contiguous()
+
+ return z_q
+
+
+class GumbelQuantizer(nn.Module):
+ def __init__(self, codebook_size, emb_dim, num_hiddens, straight_through=False, kl_weight=5e-4, temp_init=1.0):
+ super().__init__()
+ self.codebook_size = codebook_size # number of embeddings
+ self.emb_dim = emb_dim # dimension of embedding
+ self.straight_through = straight_through
+ self.temperature = temp_init
+ self.kl_weight = kl_weight
+ self.proj = nn.Conv2d(num_hiddens, codebook_size, 1) # projects last encoder layer to quantized logits
+ self.embed = nn.Embedding(codebook_size, emb_dim)
+
+ def forward(self, z):
+ hard = self.straight_through if self.training else True
+
+ logits = self.proj(z)
+
+ soft_one_hot = F.gumbel_softmax(logits, tau=self.temperature, dim=1, hard=hard)
+
+ z_q = torch.einsum("b n h w, n d -> b d h w", soft_one_hot, self.embed.weight)
+
+ # + kl divergence to the prior loss
+ qy = F.softmax(logits, dim=1)
+ diff = self.kl_weight * torch.sum(qy * torch.log(qy * self.codebook_size + 1e-10), dim=1).mean()
+ min_encoding_indices = soft_one_hot.argmax(dim=1)
+
+ return z_q, diff, {
+ "min_encoding_indices": min_encoding_indices
+ }
+
+
+class Downsample(nn.Module):
+ def __init__(self, in_channels):
+ super().__init__()
+ self.conv = torch.nn.Conv2d(in_channels, in_channels, kernel_size=3, stride=2, padding=0)
+
+ def forward(self, x):
+ pad = (0, 1, 0, 1)
+ x = torch.nn.functional.pad(x, pad, mode="constant", value=0)
+ x = self.conv(x)
+ return x
+
+
+class Upsample(nn.Module):
+ def __init__(self, in_channels):
+ super().__init__()
+ self.conv = nn.Conv2d(in_channels, in_channels, kernel_size=3, stride=1, padding=1)
+
+ def forward(self, x):
+ x = F.interpolate(x, scale_factor=2.0, mode="nearest")
+ x = self.conv(x)
+
+ return x
+
+
+class ResBlock(nn.Module):
+ def __init__(self, in_channels, out_channels=None):
+ super(ResBlock, self).__init__()
+ self.in_channels = in_channels
+ self.out_channels = in_channels if out_channels is None else out_channels
+ self.norm1 = normalize(in_channels)
+ self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=1, padding=1)
+ self.norm2 = normalize(out_channels)
+ self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1)
+ if self.in_channels != self.out_channels:
+ self.conv_out = nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=0)
+
+ def forward(self, x_in):
+ x = x_in
+ x = self.norm1(x)
+ x = swish(x)
+ x = self.conv1(x)
+ x = self.norm2(x)
+ x = swish(x)
+ x = self.conv2(x)
+ if self.in_channels != self.out_channels:
+ x_in = self.conv_out(x_in)
+
+ return x + x_in
+
+
+class AttnBlock(nn.Module):
+ def __init__(self, in_channels):
+ super().__init__()
+ self.in_channels = in_channels
+
+ self.norm = normalize(in_channels)
+ self.q = torch.nn.Conv2d(
+ in_channels,
+ in_channels,
+ kernel_size=1,
+ stride=1,
+ padding=0
+ )
+ self.k = torch.nn.Conv2d(
+ in_channels,
+ in_channels,
+ kernel_size=1,
+ stride=1,
+ padding=0
+ )
+ self.v = torch.nn.Conv2d(
+ in_channels,
+ in_channels,
+ kernel_size=1,
+ stride=1,
+ padding=0
+ )
+ self.proj_out = torch.nn.Conv2d(
+ in_channels,
+ in_channels,
+ kernel_size=1,
+ stride=1,
+ padding=0
+ )
+
+ def forward(self, x):
+ h_ = x
+ h_ = self.norm(h_)
+ q = self.q(h_)
+ k = self.k(h_)
+ v = self.v(h_)
+
+ # compute attention
+ b, c, h, w = q.shape
+ q = q.reshape(b, c, h*w)
+ q = q.permute(0, 2, 1)
+ k = k.reshape(b, c, h*w)
+ w_ = torch.bmm(q, k)
+ w_ = w_ * (int(c)**(-0.5))
+ w_ = F.softmax(w_, dim=2)
+
+ # attend to values
+ v = v.reshape(b, c, h*w)
+ w_ = w_.permute(0, 2, 1)
+ h_ = torch.bmm(v, w_)
+ h_ = h_.reshape(b, c, h, w)
+
+ h_ = self.proj_out(h_)
+
+ return x+h_
+
+
+class Encoder(nn.Module):
+ def __init__(self, in_channels, nf, emb_dim, ch_mult, num_res_blocks, resolution, attn_resolutions):
+ super().__init__()
+ self.nf = nf
+ self.num_resolutions = len(ch_mult)
+ self.num_res_blocks = num_res_blocks
+ self.resolution = resolution
+ self.attn_resolutions = attn_resolutions
+
+ curr_res = self.resolution
+ in_ch_mult = (1,)+tuple(ch_mult)
+
+ blocks = []
+ # initial convultion
+ blocks.append(nn.Conv2d(in_channels, nf, kernel_size=3, stride=1, padding=1))
+
+ # residual and downsampling blocks, with attention on smaller res (16x16)
+ for i in range(self.num_resolutions):
+ block_in_ch = nf * in_ch_mult[i]
+ block_out_ch = nf * ch_mult[i]
+ for _ in range(self.num_res_blocks):
+ blocks.append(ResBlock(block_in_ch, block_out_ch))
+ block_in_ch = block_out_ch
+ if curr_res in attn_resolutions:
+ blocks.append(AttnBlock(block_in_ch))
+
+ if i != self.num_resolutions - 1:
+ blocks.append(Downsample(block_in_ch))
+ curr_res = curr_res // 2
+
+ # non-local attention block
+ blocks.append(ResBlock(block_in_ch, block_in_ch))
+ blocks.append(AttnBlock(block_in_ch))
+ blocks.append(ResBlock(block_in_ch, block_in_ch))
+
+ # normalise and convert to latent size
+ blocks.append(normalize(block_in_ch))
+ blocks.append(nn.Conv2d(block_in_ch, emb_dim, kernel_size=3, stride=1, padding=1))
+ self.blocks = nn.ModuleList(blocks)
+
+ def forward(self, x):
+ for block in self.blocks:
+ x = block(x)
+
+ return x
+
+
+class Generator(nn.Module):
+ def __init__(self, nf, emb_dim, ch_mult, res_blocks, img_size, attn_resolutions):
+ super().__init__()
+ self.nf = nf
+ self.ch_mult = ch_mult
+ self.num_resolutions = len(self.ch_mult)
+ self.num_res_blocks = res_blocks
+ self.resolution = img_size
+ self.attn_resolutions = attn_resolutions
+ self.in_channels = emb_dim
+ self.out_channels = 3
+ block_in_ch = self.nf * self.ch_mult[-1]
+ curr_res = self.resolution // 2 ** (self.num_resolutions-1)
+
+ blocks = []
+ # initial conv
+ blocks.append(nn.Conv2d(self.in_channels, block_in_ch, kernel_size=3, stride=1, padding=1))
+
+ # non-local attention block
+ blocks.append(ResBlock(block_in_ch, block_in_ch))
+ blocks.append(AttnBlock(block_in_ch))
+ blocks.append(ResBlock(block_in_ch, block_in_ch))
+
+ for i in reversed(range(self.num_resolutions)):
+ block_out_ch = self.nf * self.ch_mult[i]
+
+ for _ in range(self.num_res_blocks):
+ blocks.append(ResBlock(block_in_ch, block_out_ch))
+ block_in_ch = block_out_ch
+
+ if curr_res in self.attn_resolutions:
+ blocks.append(AttnBlock(block_in_ch))
+
+ if i != 0:
+ blocks.append(Upsample(block_in_ch))
+ curr_res = curr_res * 2
+
+ blocks.append(normalize(block_in_ch))
+ blocks.append(nn.Conv2d(block_in_ch, self.out_channels, kernel_size=3, stride=1, padding=1))
+
+ self.blocks = nn.ModuleList(blocks)
+
+
+ def forward(self, x):
+ for block in self.blocks:
+ x = block(x)
+
+ return x
+
+
+@ARCH_REGISTRY.register()
+class VQAutoEncoder(nn.Module):
+ def __init__(self, img_size, nf, ch_mult, quantizer="nearest", res_blocks=2, attn_resolutions=[16], codebook_size=1024, emb_dim=256,
+ beta=0.25, gumbel_straight_through=False, gumbel_kl_weight=1e-8, model_path=None):
+ super().__init__()
+ logger = get_root_logger()
+ self.in_channels = 3
+ self.nf = nf
+ self.n_blocks = res_blocks
+ self.codebook_size = codebook_size
+ self.embed_dim = emb_dim
+ self.ch_mult = ch_mult
+ self.resolution = img_size
+ self.attn_resolutions = attn_resolutions
+ self.quantizer_type = quantizer
+ self.encoder = Encoder(
+ self.in_channels,
+ self.nf,
+ self.embed_dim,
+ self.ch_mult,
+ self.n_blocks,
+ self.resolution,
+ self.attn_resolutions
+ )
+ if self.quantizer_type == "nearest":
+ self.beta = beta #0.25
+ self.quantize = VectorQuantizer(self.codebook_size, self.embed_dim, self.beta)
+ elif self.quantizer_type == "gumbel":
+ self.gumbel_num_hiddens = emb_dim
+ self.straight_through = gumbel_straight_through
+ self.kl_weight = gumbel_kl_weight
+ self.quantize = GumbelQuantizer(
+ self.codebook_size,
+ self.embed_dim,
+ self.gumbel_num_hiddens,
+ self.straight_through,
+ self.kl_weight
+ )
+ self.generator = Generator(
+ self.nf,
+ self.embed_dim,
+ self.ch_mult,
+ self.n_blocks,
+ self.resolution,
+ self.attn_resolutions
+ )
+
+ if model_path is not None:
+ chkpt = torch.load(model_path, map_location='cpu')
+ if 'params_ema' in chkpt:
+ self.load_state_dict(torch.load(model_path, map_location='cpu')['params_ema'])
+ logger.info(f'vqgan is loaded from: {model_path} [params_ema]')
+ elif 'params' in chkpt:
+ self.load_state_dict(torch.load(model_path, map_location='cpu')['params'])
+ logger.info(f'vqgan is loaded from: {model_path} [params]')
+ else:
+ raise ValueError(f'Wrong params!')
+
+
+ def forward(self, x):
+ x = self.encoder(x)
+ quant, codebook_loss, quant_stats = self.quantize(x)
+ x = self.generator(quant)
+ return x, codebook_loss, quant_stats
+
+
+
+# patch based discriminator
+@ARCH_REGISTRY.register()
+class VQGANDiscriminator(nn.Module):
+ def __init__(self, nc=3, ndf=64, n_layers=4, model_path=None):
+ super().__init__()
+
+ layers = [nn.Conv2d(nc, ndf, kernel_size=4, stride=2, padding=1), nn.LeakyReLU(0.2, True)]
+ ndf_mult = 1
+ ndf_mult_prev = 1
+ for n in range(1, n_layers): # gradually increase the number of filters
+ ndf_mult_prev = ndf_mult
+ ndf_mult = min(2 ** n, 8)
+ layers += [
+ nn.Conv2d(ndf * ndf_mult_prev, ndf * ndf_mult, kernel_size=4, stride=2, padding=1, bias=False),
+ nn.BatchNorm2d(ndf * ndf_mult),
+ nn.LeakyReLU(0.2, True)
+ ]
+
+ ndf_mult_prev = ndf_mult
+ ndf_mult = min(2 ** n_layers, 8)
+
+ layers += [
+ nn.Conv2d(ndf * ndf_mult_prev, ndf * ndf_mult, kernel_size=4, stride=1, padding=1, bias=False),
+ nn.BatchNorm2d(ndf * ndf_mult),
+ nn.LeakyReLU(0.2, True)
+ ]
+
+ layers += [
+ nn.Conv2d(ndf * ndf_mult, 1, kernel_size=4, stride=1, padding=1)] # output 1 channel prediction map
+ self.main = nn.Sequential(*layers)
+
+ if model_path is not None:
+ chkpt = torch.load(model_path, map_location='cpu')
+ if 'params_d' in chkpt:
+ self.load_state_dict(torch.load(model_path, map_location='cpu')['params_d'])
+ elif 'params' in chkpt:
+ self.load_state_dict(torch.load(model_path, map_location='cpu')['params'])
+ else:
+ raise ValueError(f'Wrong params!')
+
+ def forward(self, x):
+ return self.main(x)
+ \ No newline at end of file
diff --git a/scripts/r_faceboost/__init__.py b/scripts/r_faceboost/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/scripts/r_faceboost/__init__.py
diff --git a/scripts/r_faceboost/restorer.py b/scripts/r_faceboost/restorer.py
new file mode 100644
index 0000000..60c0165
--- /dev/null
+++ b/scripts/r_faceboost/restorer.py
@@ -0,0 +1,130 @@
+import sys
+import cv2
+import numpy as np
+import torch
+from torchvision.transforms.functional import normalize
+
+try:
+ import torch.cuda as cuda
+except:
+ cuda = None
+
+import comfy.utils
+import folder_paths
+import comfy.model_management as model_management
+
+from scripts.reactor_logger import logger
+from r_basicsr.utils.registry import ARCH_REGISTRY
+from r_chainner import model_loading
+from reactor_utils import (
+ tensor2img,
+ img2tensor,
+ set_ort_session,
+ prepare_cropped_face,
+ normalize_cropped_face
+)
+
+
+if cuda is not None:
+ if cuda.is_available():
+ providers = ["CUDAExecutionProvider"]
+ else:
+ providers = ["CPUExecutionProvider"]
+else:
+ providers = ["CPUExecutionProvider"]
+
+
+def get_restored_face(cropped_face,
+ face_restore_model,
+ face_restore_visibility,
+ codeformer_weight,
+ interpolation: str = "Bicubic"):
+
+ if interpolation == "Bicubic":
+ interpolate = cv2.INTER_CUBIC
+ elif interpolation == "Bilinear":
+ interpolate = cv2.INTER_LINEAR
+ elif interpolation == "Nearest":
+ interpolate = cv2.INTER_NEAREST
+ elif interpolation == "Lanczos":
+ interpolate = cv2.INTER_LANCZOS4
+
+ face_size = 512
+ if "1024" in face_restore_model.lower():
+ face_size = 1024
+ elif "2048" in face_restore_model.lower():
+ face_size = 2048
+
+ scale = face_size / cropped_face.shape[0]
+
+ logger.status(f"Boosting the Face with {face_restore_model} | Face Size is set to {face_size} with Scale Factor = {scale} and '{interpolation}' interpolation")
+
+ cropped_face = cv2.resize(cropped_face, (face_size, face_size), interpolation=interpolate)
+
+ # For upscaling the base 128px face, I found bicubic interpolation to be the best compromise targeting antialiasing
+ # and detail preservation. Nearest is predictably unusable, Linear produces too much aliasing, and Lanczos produces
+ # too many hallucinations and artifacts/fringing.
+
+ model_path = folder_paths.get_full_path("facerestore_models", face_restore_model)
+ device = model_management.get_torch_device()
+
+ cropped_face_t = img2tensor(cropped_face / 255., bgr2rgb=True, float32=True)
+ normalize(cropped_face_t, (0.5, 0.5, 0.5), (0.5, 0.5, 0.5), inplace=True)
+ cropped_face_t = cropped_face_t.unsqueeze(0).to(device)
+
+ try:
+
+ with torch.no_grad():
+
+ if ".onnx" in face_restore_model: # ONNX models
+
+ ort_session = set_ort_session(model_path, providers=providers)
+ ort_session_inputs = {}
+ facerestore_model = ort_session
+
+ for ort_session_input in ort_session.get_inputs():
+ if ort_session_input.name == "input":
+ cropped_face_prep = prepare_cropped_face(cropped_face)
+ ort_session_inputs[ort_session_input.name] = cropped_face_prep
+ if ort_session_input.name == "weight":
+ weight = np.array([1], dtype=np.double)
+ ort_session_inputs[ort_session_input.name] = weight
+
+ output = ort_session.run(None, ort_session_inputs)[0][0]
+ restored_face = normalize_cropped_face(output)
+
+ else: # PTH models
+
+ if "codeformer" in face_restore_model.lower():
+ codeformer_net = ARCH_REGISTRY.get("CodeFormer")(
+ dim_embd=512,
+ codebook_size=1024,
+ n_head=8,
+ n_layers=9,
+ connect_list=["32", "64", "128", "256"],
+ ).to(device)
+ checkpoint = torch.load(model_path)["params_ema"]
+ codeformer_net.load_state_dict(checkpoint)
+ facerestore_model = codeformer_net.eval()
+ else:
+ sd = comfy.utils.load_torch_file(model_path, safe_load=True)
+ facerestore_model = model_loading.load_state_dict(sd).eval()
+ facerestore_model.to(device)
+
+ output = facerestore_model(cropped_face_t, w=codeformer_weight)[
+ 0] if "codeformer" in face_restore_model.lower() else facerestore_model(cropped_face_t)[0]
+ restored_face = tensor2img(output, rgb2bgr=True, min_max=(-1, 1))
+
+ del output
+ torch.cuda.empty_cache()
+
+ except Exception as error:
+
+ print(f"\tFailed inference: {error}", file=sys.stderr)
+ restored_face = tensor2img(cropped_face_t, rgb2bgr=True, min_max=(-1, 1))
+
+ if face_restore_visibility < 1:
+ restored_face = cropped_face * (1 - face_restore_visibility) + restored_face * face_restore_visibility
+
+ restored_face = restored_face.astype("uint8")
+ return restored_face, scale
diff --git a/scripts/r_faceboost/swapper.py b/scripts/r_faceboost/swapper.py
new file mode 100644
index 0000000..e0467cf
--- /dev/null
+++ b/scripts/r_faceboost/swapper.py
@@ -0,0 +1,42 @@
+import cv2
+import numpy as np
+
+# The following code is almost entirely copied from INSwapper; the only change here is that we want to use Lanczos
+# interpolation for the warpAffine call. Now that the face has been restored, Lanczos represents a good compromise
+# whether the restored face needs to be upscaled or downscaled.
+def in_swap(img, bgr_fake, M):
+ target_img = img
+ IM = cv2.invertAffineTransform(M)
+ img_white = np.full((bgr_fake.shape[0], bgr_fake.shape[1]), 255, dtype=np.float32)
+
+ # Note the use of bicubic here; this is functionally the only change from the source code
+ bgr_fake = cv2.warpAffine(bgr_fake, IM, (target_img.shape[1], target_img.shape[0]), borderValue=0.0, flags=cv2.INTER_CUBIC)
+
+ img_white = cv2.warpAffine(img_white, IM, (target_img.shape[1], target_img.shape[0]), borderValue=0.0)
+ img_white[img_white > 20] = 255
+ img_mask = img_white
+ mask_h_inds, mask_w_inds = np.where(img_mask == 255)
+ mask_h = np.max(mask_h_inds) - np.min(mask_h_inds)
+ mask_w = np.max(mask_w_inds) - np.min(mask_w_inds)
+ mask_size = int(np.sqrt(mask_h * mask_w))
+ k = max(mask_size // 10, 10)
+ # k = max(mask_size//20, 6)
+ # k = 6
+ kernel = np.ones((k, k), np.uint8)
+ img_mask = cv2.erode(img_mask, kernel, iterations=1)
+ kernel = np.ones((2, 2), np.uint8)
+ k = max(mask_size // 20, 5)
+ # k = 3
+ # k = 3
+ kernel_size = (k, k)
+ blur_size = tuple(2 * i + 1 for i in kernel_size)
+ img_mask = cv2.GaussianBlur(img_mask, blur_size, 0)
+ k = 5
+ kernel_size = (k, k)
+ blur_size = tuple(2 * i + 1 for i in kernel_size)
+ img_mask /= 255
+ # img_mask = fake_diff
+ img_mask = np.reshape(img_mask, [img_mask.shape[0], img_mask.shape[1], 1])
+ fake_merged = img_mask * bgr_fake + (1 - img_mask) * target_img.astype(np.float32)
+ fake_merged = fake_merged.astype(np.uint8)
+ return fake_merged
diff --git a/scripts/r_masking/__init__.py b/scripts/r_masking/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/scripts/r_masking/__init__.py
diff --git a/scripts/r_masking/core.py b/scripts/r_masking/core.py
new file mode 100644
index 0000000..36862e1
--- /dev/null
+++ b/scripts/r_masking/core.py
@@ -0,0 +1,647 @@
+import numpy as np
+import cv2
+import torch
+import torchvision.transforms.functional as TF
+
+import sys as _sys
+from keyword import iskeyword as _iskeyword
+from operator import itemgetter as _itemgetter
+
+from segment_anything import SamPredictor
+
+from comfy import model_management
+
+
+################################################################################
+### namedtuple
+################################################################################
+
+try:
+ from _collections import _tuplegetter
+except ImportError:
+ _tuplegetter = lambda index, doc: property(_itemgetter(index), doc=doc)
+
+def namedtuple(typename, field_names, *, rename=False, defaults=None, module=None):
+ """Returns a new subclass of tuple with named fields.
+
+ >>> Point = namedtuple('Point', ['x', 'y'])
+ >>> Point.__doc__ # docstring for the new class
+ 'Point(x, y)'
+ >>> p = Point(11, y=22) # instantiate with positional args or keywords
+ >>> p[0] + p[1] # indexable like a plain tuple
+ 33
+ >>> x, y = p # unpack like a regular tuple
+ >>> x, y
+ (11, 22)
+ >>> p.x + p.y # fields also accessible by name
+ 33
+ >>> d = p._asdict() # convert to a dictionary
+ >>> d['x']
+ 11
+ >>> Point(**d) # convert from a dictionary
+ Point(x=11, y=22)
+ >>> p._replace(x=100) # _replace() is like str.replace() but targets named fields
+ Point(x=100, y=22)
+
+ """
+
+ # Validate the field names. At the user's option, either generate an error
+ # message or automatically replace the field name with a valid name.
+ if isinstance(field_names, str):
+ field_names = field_names.replace(',', ' ').split()
+ field_names = list(map(str, field_names))
+ typename = _sys.intern(str(typename))
+
+ if rename:
+ seen = set()
+ for index, name in enumerate(field_names):
+ if (not name.isidentifier()
+ or _iskeyword(name)
+ or name.startswith('_')
+ or name in seen):
+ field_names[index] = f'_{index}'
+ seen.add(name)
+
+ for name in [typename] + field_names:
+ if type(name) is not str:
+ raise TypeError('Type names and field names must be strings')
+ if not name.isidentifier():
+ raise ValueError('Type names and field names must be valid '
+ f'identifiers: {name!r}')
+ if _iskeyword(name):
+ raise ValueError('Type names and field names cannot be a '
+ f'keyword: {name!r}')
+
+ seen = set()
+ for name in field_names:
+ if name.startswith('_') and not rename:
+ raise ValueError('Field names cannot start with an underscore: '
+ f'{name!r}')
+ if name in seen:
+ raise ValueError(f'Encountered duplicate field name: {name!r}')
+ seen.add(name)
+
+ field_defaults = {}
+ if defaults is not None:
+ defaults = tuple(defaults)
+ if len(defaults) > len(field_names):
+ raise TypeError('Got more default values than field names')
+ field_defaults = dict(reversed(list(zip(reversed(field_names),
+ reversed(defaults)))))
+
+ # Variables used in the methods and docstrings
+ field_names = tuple(map(_sys.intern, field_names))
+ num_fields = len(field_names)
+ arg_list = ', '.join(field_names)
+ if num_fields == 1:
+ arg_list += ','
+ repr_fmt = '(' + ', '.join(f'{name}=%r' for name in field_names) + ')'
+ tuple_new = tuple.__new__
+ _dict, _tuple, _len, _map, _zip = dict, tuple, len, map, zip
+
+ # Create all the named tuple methods to be added to the class namespace
+
+ namespace = {
+ '_tuple_new': tuple_new,
+ '__builtins__': {},
+ '__name__': f'namedtuple_{typename}',
+ }
+ code = f'lambda _cls, {arg_list}: _tuple_new(_cls, ({arg_list}))'
+ __new__ = eval(code, namespace)
+ __new__.__name__ = '__new__'
+ __new__.__doc__ = f'Create new instance of {typename}({arg_list})'
+ if defaults is not None:
+ __new__.__defaults__ = defaults
+
+ @classmethod
+ def _make(cls, iterable):
+ result = tuple_new(cls, iterable)
+ if _len(result) != num_fields:
+ raise TypeError(f'Expected {num_fields} arguments, got {len(result)}')
+ return result
+
+ _make.__func__.__doc__ = (f'Make a new {typename} object from a sequence '
+ 'or iterable')
+
+ def _replace(self, /, **kwds):
+ result = self._make(_map(kwds.pop, field_names, self))
+ if kwds:
+ raise ValueError(f'Got unexpected field names: {list(kwds)!r}')
+ return result
+
+ _replace.__doc__ = (f'Return a new {typename} object replacing specified '
+ 'fields with new values')
+
+ def __repr__(self):
+ 'Return a nicely formatted representation string'
+ return self.__class__.__name__ + repr_fmt % self
+
+ def _asdict(self):
+ 'Return a new dict which maps field names to their values.'
+ return _dict(_zip(self._fields, self))
+
+ def __getnewargs__(self):
+ 'Return self as a plain tuple. Used by copy and pickle.'
+ return _tuple(self)
+
+ # Modify function metadata to help with introspection and debugging
+ for method in (
+ __new__,
+ _make.__func__,
+ _replace,
+ __repr__,
+ _asdict,
+ __getnewargs__,
+ ):
+ method.__qualname__ = f'{typename}.{method.__name__}'
+
+ # Build-up the class namespace dictionary
+ # and use type() to build the result class
+ class_namespace = {
+ '__doc__': f'{typename}({arg_list})',
+ '__slots__': (),
+ '_fields': field_names,
+ '_field_defaults': field_defaults,
+ '__new__': __new__,
+ '_make': _make,
+ '_replace': _replace,
+ '__repr__': __repr__,
+ '_asdict': _asdict,
+ '__getnewargs__': __getnewargs__,
+ '__match_args__': field_names,
+ }
+ for index, name in enumerate(field_names):
+ doc = _sys.intern(f'Alias for field number {index}')
+ class_namespace[name] = _tuplegetter(index, doc)
+
+ result = type(typename, (tuple,), class_namespace)
+
+ # For pickling to work, the __module__ variable needs to be set to the frame
+ # where the named tuple is created. Bypass this step in environments where
+ # sys._getframe is not defined (Jython for example) or sys._getframe is not
+ # defined for arguments greater than 0 (IronPython), or where the user has
+ # specified a particular module.
+ if module is None:
+ try:
+ module = _sys._getframe(1).f_globals.get('__name__', '__main__')
+ except (AttributeError, ValueError):
+ pass
+ if module is not None:
+ result.__module__ = module
+
+ return result
+
+
+SEG = namedtuple("SEG",
+ ['cropped_image', 'cropped_mask', 'confidence', 'crop_region', 'bbox', 'label', 'control_net_wrapper'],
+ defaults=[None])
+
+def crop_ndarray4(npimg, crop_region):
+ x1 = crop_region[0]
+ y1 = crop_region[1]
+ x2 = crop_region[2]
+ y2 = crop_region[3]
+
+ cropped = npimg[:, y1:y2, x1:x2, :]
+
+ return cropped
+
+crop_tensor4 = crop_ndarray4
+
+def crop_ndarray2(npimg, crop_region):
+ x1 = crop_region[0]
+ y1 = crop_region[1]
+ x2 = crop_region[2]
+ y2 = crop_region[3]
+
+ cropped = npimg[y1:y2, x1:x2]
+
+ return cropped
+
+def crop_image(image, crop_region):
+ return crop_tensor4(image, crop_region)
+
+def normalize_region(limit, startp, size):
+ if startp < 0:
+ new_endp = min(limit, size)
+ new_startp = 0
+ elif startp + size > limit:
+ new_startp = max(0, limit - size)
+ new_endp = limit
+ else:
+ new_startp = startp
+ new_endp = min(limit, startp+size)
+
+ return int(new_startp), int(new_endp)
+
+def make_crop_region(w, h, bbox, crop_factor, crop_min_size=None):
+ x1 = bbox[0]
+ y1 = bbox[1]
+ x2 = bbox[2]
+ y2 = bbox[3]
+
+ bbox_w = x2 - x1
+ bbox_h = y2 - y1
+
+ crop_w = bbox_w * crop_factor
+ crop_h = bbox_h * crop_factor
+
+ if crop_min_size is not None:
+ crop_w = max(crop_min_size, crop_w)
+ crop_h = max(crop_min_size, crop_h)
+
+ kernel_x = x1 + bbox_w / 2
+ kernel_y = y1 + bbox_h / 2
+
+ new_x1 = int(kernel_x - crop_w / 2)
+ new_y1 = int(kernel_y - crop_h / 2)
+
+ # make sure position in (w,h)
+ new_x1, new_x2 = normalize_region(w, new_x1, crop_w)
+ new_y1, new_y2 = normalize_region(h, new_y1, crop_h)
+
+ return [new_x1, new_y1, new_x2, new_y2]
+
+def create_segmasks(results):
+ bboxs = results[1]
+ segms = results[2]
+ confidence = results[3]
+
+ results = []
+ for i in range(len(segms)):
+ item = (bboxs[i], segms[i].astype(np.float32), confidence[i])
+ results.append(item)
+ return results
+
+def dilate_masks(segmasks, dilation_factor, iter=1):
+ if dilation_factor == 0:
+ return segmasks
+
+ dilated_masks = []
+ kernel = np.ones((abs(dilation_factor), abs(dilation_factor)), np.uint8)
+
+ kernel = cv2.UMat(kernel)
+
+ for i in range(len(segmasks)):
+ cv2_mask = segmasks[i][1]
+
+ cv2_mask = cv2.UMat(cv2_mask)
+
+ if dilation_factor > 0:
+ dilated_mask = cv2.dilate(cv2_mask, kernel, iter)
+ else:
+ dilated_mask = cv2.erode(cv2_mask, kernel, iter)
+
+ dilated_mask = dilated_mask.get()
+
+ item = (segmasks[i][0], dilated_mask, segmasks[i][2])
+ dilated_masks.append(item)
+
+ return dilated_masks
+
+def is_same_device(a, b):
+ a_device = torch.device(a) if isinstance(a, str) else a
+ b_device = torch.device(b) if isinstance(b, str) else b
+ return a_device.type == b_device.type and a_device.index == b_device.index
+
+class SafeToGPU:
+ def __init__(self, size):
+ self.size = size
+
+ def to_device(self, obj, device):
+ if is_same_device(device, 'cpu'):
+ obj.to(device)
+ else:
+ if is_same_device(obj.device, 'cpu'): # cpu to gpu
+ model_management.free_memory(self.size * 1.3, device)
+ if model_management.get_free_memory(device) > self.size * 1.3:
+ try:
+ obj.to(device)
+ except:
+ print(f"WARN: The model is not moved to the '{device}' due to insufficient memory. [1]")
+ else:
+ print(f"WARN: The model is not moved to the '{device}' due to insufficient memory. [2]")
+
+def center_of_bbox(bbox):
+ w, h = bbox[2] - bbox[0], bbox[3] - bbox[1]
+ return bbox[0] + w/2, bbox[1] + h/2
+
+def sam_predict(predictor, points, plabs, bbox, threshold):
+ point_coords = None if not points else np.array(points)
+ point_labels = None if not plabs else np.array(plabs)
+
+ box = np.array([bbox]) if bbox is not None else None
+
+ cur_masks, scores, _ = predictor.predict(point_coords=point_coords, point_labels=point_labels, box=box)
+
+ total_masks = []
+
+ selected = False
+ max_score = 0
+ max_mask = None
+ for idx in range(len(scores)):
+ if scores[idx] > max_score:
+ max_score = scores[idx]
+ max_mask = cur_masks[idx]
+
+ if scores[idx] >= threshold:
+ selected = True
+ total_masks.append(cur_masks[idx])
+ else:
+ pass
+
+ if not selected and max_mask is not None:
+ total_masks.append(max_mask)
+
+ return total_masks
+
+def make_2d_mask(mask):
+ if len(mask.shape) == 4:
+ return mask.squeeze(0).squeeze(0)
+
+ elif len(mask.shape) == 3:
+ return mask.squeeze(0)
+
+ return mask
+
+def gen_detection_hints_from_mask_area(x, y, mask, threshold, use_negative):
+ mask = make_2d_mask(mask)
+
+ points = []
+ plabs = []
+
+ # minimum sampling step >= 3
+ y_step = max(3, int(mask.shape[0] / 20))
+ x_step = max(3, int(mask.shape[1] / 20))
+
+ for i in range(0, len(mask), y_step):
+ for j in range(0, len(mask[i]), x_step):
+ if mask[i][j] > threshold:
+ points.append((x + j, y + i))
+ plabs.append(1)
+ elif use_negative and mask[i][j] == 0:
+ points.append((x + j, y + i))
+ plabs.append(0)
+
+ return points, plabs
+
+def gen_negative_hints(w, h, x1, y1, x2, y2):
+ npoints = []
+ nplabs = []
+
+ # minimum sampling step >= 3
+ y_step = max(3, int(w / 20))
+ x_step = max(3, int(h / 20))
+
+ for i in range(10, h - 10, y_step):
+ for j in range(10, w - 10, x_step):
+ if not (x1 - 10 <= j and j <= x2 + 10 and y1 - 10 <= i and i <= y2 + 10):
+ npoints.append((j, i))
+ nplabs.append(0)
+
+ return npoints, nplabs
+
+def generate_detection_hints(image, seg, center, detection_hint, dilated_bbox, mask_hint_threshold, use_small_negative,
+ mask_hint_use_negative):
+ [x1, y1, x2, y2] = dilated_bbox
+
+ points = []
+ plabs = []
+ if detection_hint == "center-1":
+ points.append(center)
+ plabs = [1] # 1 = foreground point, 0 = background point
+
+ elif detection_hint == "horizontal-2":
+ gap = (x2 - x1) / 3
+ points.append((x1 + gap, center[1]))
+ points.append((x1 + gap * 2, center[1]))
+ plabs = [1, 1]
+
+ elif detection_hint == "vertical-2":
+ gap = (y2 - y1) / 3
+ points.append((center[0], y1 + gap))
+ points.append((center[0], y1 + gap * 2))
+ plabs = [1, 1]
+
+ elif detection_hint == "rect-4":
+ x_gap = (x2 - x1) / 3
+ y_gap = (y2 - y1) / 3
+ points.append((x1 + x_gap, center[1]))
+ points.append((x1 + x_gap * 2, center[1]))
+ points.append((center[0], y1 + y_gap))
+ points.append((center[0], y1 + y_gap * 2))
+ plabs = [1, 1, 1, 1]
+
+ elif detection_hint == "diamond-4":
+ x_gap = (x2 - x1) / 3
+ y_gap = (y2 - y1) / 3
+ points.append((x1 + x_gap, y1 + y_gap))
+ points.append((x1 + x_gap * 2, y1 + y_gap))
+ points.append((x1 + x_gap, y1 + y_gap * 2))
+ points.append((x1 + x_gap * 2, y1 + y_gap * 2))
+ plabs = [1, 1, 1, 1]
+
+ elif detection_hint == "mask-point-bbox":
+ center = center_of_bbox(seg.bbox)
+ points.append(center)
+ plabs = [1]
+
+ elif detection_hint == "mask-area":
+ points, plabs = gen_detection_hints_from_mask_area(seg.crop_region[0], seg.crop_region[1],
+ seg.cropped_mask,
+ mask_hint_threshold, use_small_negative)
+
+ if mask_hint_use_negative == "Outter":
+ npoints, nplabs = gen_negative_hints(image.shape[0], image.shape[1],
+ seg.crop_region[0], seg.crop_region[1],
+ seg.crop_region[2], seg.crop_region[3])
+
+ points += npoints
+ plabs += nplabs
+
+ return points, plabs
+
+def combine_masks2(masks):
+ if len(masks) == 0:
+ return None
+ else:
+ initial_cv2_mask = np.array(masks[0]).astype(np.uint8)
+ combined_cv2_mask = initial_cv2_mask
+
+ for i in range(1, len(masks)):
+ cv2_mask = np.array(masks[i]).astype(np.uint8)
+
+ if combined_cv2_mask.shape == cv2_mask.shape:
+ combined_cv2_mask = cv2.bitwise_or(combined_cv2_mask, cv2_mask)
+ else:
+ # do nothing - incompatible mask
+ pass
+
+ mask = torch.from_numpy(combined_cv2_mask)
+ return mask
+
+def dilate_mask(mask, dilation_factor, iter=1):
+ if dilation_factor == 0:
+ return make_2d_mask(mask)
+
+ mask = make_2d_mask(mask)
+
+ kernel = np.ones((abs(dilation_factor), abs(dilation_factor)), np.uint8)
+
+ mask = cv2.UMat(mask)
+ kernel = cv2.UMat(kernel)
+
+ if dilation_factor > 0:
+ result = cv2.dilate(mask, kernel, iter)
+ else:
+ result = cv2.erode(mask, kernel, iter)
+
+ return result.get()
+
+def convert_and_stack_masks(masks):
+ if len(masks) == 0:
+ return None
+
+ mask_tensors = []
+ for mask in masks:
+ mask_array = np.array(mask, dtype=np.uint8)
+ mask_tensor = torch.from_numpy(mask_array)
+ mask_tensors.append(mask_tensor)
+
+ stacked_masks = torch.stack(mask_tensors, dim=0)
+ stacked_masks = stacked_masks.unsqueeze(1)
+
+ return stacked_masks
+
+def merge_and_stack_masks(stacked_masks, group_size):
+ if stacked_masks is None:
+ return None
+
+ num_masks = stacked_masks.size(0)
+ merged_masks = []
+
+ for i in range(0, num_masks, group_size):
+ subset_masks = stacked_masks[i:i + group_size]
+ merged_mask = torch.any(subset_masks, dim=0)
+ merged_masks.append(merged_mask)
+
+ if len(merged_masks) > 0:
+ merged_masks = torch.stack(merged_masks, dim=0)
+
+ return merged_masks
+
+def make_sam_mask_segmented(sam_model, segs, image, detection_hint, dilation,
+ threshold, bbox_expansion, mask_hint_threshold, mask_hint_use_negative):
+ if sam_model.is_auto_mode:
+ device = model_management.get_torch_device()
+ sam_model.safe_to.to_device(sam_model, device=device)
+
+ try:
+ predictor = SamPredictor(sam_model)
+ image = np.clip(255. * image.cpu().numpy().squeeze(), 0, 255).astype(np.uint8)
+ predictor.set_image(image, "RGB")
+
+ total_masks = []
+
+ use_small_negative = mask_hint_use_negative == "Small"
+
+ # seg_shape = segs[0]
+ segs = segs[1]
+ if detection_hint == "mask-points":
+ points = []
+ plabs = []
+
+ for i in range(len(segs)):
+ bbox = segs[i].bbox
+ center = center_of_bbox(bbox)
+ points.append(center)
+
+ # small point is background, big point is foreground
+ if use_small_negative and bbox[2] - bbox[0] < 10:
+ plabs.append(0)
+ else:
+ plabs.append(1)
+
+ detected_masks = sam_predict(predictor, points, plabs, None, threshold)
+ total_masks += detected_masks
+
+ else:
+ for i in range(len(segs)):
+ bbox = segs[i].bbox
+ center = center_of_bbox(bbox)
+ x1 = max(bbox[0] - bbox_expansion, 0)
+ y1 = max(bbox[1] - bbox_expansion, 0)
+ x2 = min(bbox[2] + bbox_expansion, image.shape[1])
+ y2 = min(bbox[3] + bbox_expansion, image.shape[0])
+
+ dilated_bbox = [x1, y1, x2, y2]
+
+ points, plabs = generate_detection_hints(image, segs[i], center, detection_hint, dilated_bbox,
+ mask_hint_threshold, use_small_negative,
+ mask_hint_use_negative)
+
+ detected_masks = sam_predict(predictor, points, plabs, dilated_bbox, threshold)
+
+ total_masks += detected_masks
+
+ # merge every collected masks
+ mask = combine_masks2(total_masks)
+
+ finally:
+ if sam_model.is_auto_mode:
+ sam_model.cpu()
+
+ pass
+
+ mask_working_device = torch.device("cpu")
+
+ if mask is not None:
+ mask = mask.float()
+ mask = dilate_mask(mask.cpu().numpy(), dilation)
+ mask = torch.from_numpy(mask)
+ mask = mask.to(device=mask_working_device)
+ else:
+ # Extracting batch, height and width
+ height, width, _ = image.shape
+ mask = torch.zeros(
+ (height, width), dtype=torch.float32, device=mask_working_device
+ ) # empty mask
+
+ stacked_masks = convert_and_stack_masks(total_masks)
+
+ return (mask, merge_and_stack_masks(stacked_masks, group_size=3))
+
+def tensor2mask(t: torch.Tensor) -> torch.Tensor:
+ size = t.size()
+ if (len(size) < 4):
+ return t
+ if size[3] == 1:
+ return t[:,:,:,0]
+ elif size[3] == 4:
+ # Not sure what the right thing to do here is. Going to try to be a little smart and use alpha unless all alpha is 1 in case we'll fallback to RGB behavior
+ if torch.min(t[:, :, :, 3]).item() != 1.:
+ return t[:,:,:,3]
+ return TF.rgb_to_grayscale(tensor2rgb(t).permute(0,3,1,2), num_output_channels=1)[:,0,:,:]
+
+def tensor2rgb(t: torch.Tensor) -> torch.Tensor:
+ size = t.size()
+ if (len(size) < 4):
+ return t.unsqueeze(3).repeat(1, 1, 1, 3)
+ if size[3] == 1:
+ return t.repeat(1, 1, 1, 3)
+ elif size[3] == 4:
+ return t[:, :, :, :3]
+ else:
+ return t
+
+def tensor2rgba(t: torch.Tensor) -> torch.Tensor:
+ size = t.size()
+ if (len(size) < 4):
+ return t.unsqueeze(3).repeat(1, 1, 1, 4)
+ elif size[3] == 1:
+ return t.repeat(1, 1, 1, 4)
+ elif size[3] == 3:
+ alpha_tensor = torch.ones((size[0], size[1], size[2], 1))
+ return torch.cat((t, alpha_tensor), dim=3)
+ else:
+ return t
diff --git a/scripts/r_masking/segs.py b/scripts/r_masking/segs.py
new file mode 100644
index 0000000..60c22d7
--- /dev/null
+++ b/scripts/r_masking/segs.py
@@ -0,0 +1,22 @@
+def filter(segs, labels):
+ labels = set([label.strip() for label in labels])
+
+ if 'all' in labels:
+ return (segs, (segs[0], []), )
+ else:
+ res_segs = []
+ remained_segs = []
+
+ for x in segs[1]:
+ if x.label in labels:
+ res_segs.append(x)
+ elif 'eyes' in labels and x.label in ['left_eye', 'right_eye']:
+ res_segs.append(x)
+ elif 'eyebrows' in labels and x.label in ['left_eyebrow', 'right_eyebrow']:
+ res_segs.append(x)
+ elif 'pupils' in labels and x.label in ['left_pupil', 'right_pupil']:
+ res_segs.append(x)
+ else:
+ remained_segs.append(x)
+
+ return ((segs[0], res_segs), (segs[0], remained_segs), )
diff --git a/scripts/r_masking/subcore.py b/scripts/r_masking/subcore.py
new file mode 100644
index 0000000..cf7bf7d
--- /dev/null
+++ b/scripts/r_masking/subcore.py
@@ -0,0 +1,117 @@
+import numpy as np
+import cv2
+from PIL import Image
+
+import scripts.r_masking.core as core
+from reactor_utils import tensor_to_pil
+
+try:
+ from ultralytics import YOLO
+except Exception as e:
+ print(e)
+
+
+def load_yolo(model_path: str):
+ try:
+ return YOLO(model_path)
+ except ModuleNotFoundError:
+ # https://github.com/ultralytics/ultralytics/issues/3856
+ YOLO("yolov8n.pt")
+ return YOLO(model_path)
+
+def inference_bbox(
+ model,
+ image: Image.Image,
+ confidence: float = 0.3,
+ device: str = "",
+):
+ pred = model(image, conf=confidence, device=device)
+
+ bboxes = pred[0].boxes.xyxy.cpu().numpy()
+ cv2_image = np.array(image)
+ if len(cv2_image.shape) == 3:
+ cv2_image = cv2_image[:, :, ::-1].copy() # Convert RGB to BGR for cv2 processing
+ else:
+ # Handle the grayscale image here
+ # For example, you might want to convert it to a 3-channel grayscale image for consistency:
+ cv2_image = cv2.cvtColor(cv2_image, cv2.COLOR_GRAY2BGR)
+ cv2_gray = cv2.cvtColor(cv2_image, cv2.COLOR_BGR2GRAY)
+
+ segms = []
+ for x0, y0, x1, y1 in bboxes:
+ cv2_mask = np.zeros(cv2_gray.shape, np.uint8)
+ cv2.rectangle(cv2_mask, (int(x0), int(y0)), (int(x1), int(y1)), 255, -1)
+ cv2_mask_bool = cv2_mask.astype(bool)
+ segms.append(cv2_mask_bool)
+
+ n, m = bboxes.shape
+ if n == 0:
+ return [[], [], [], []]
+
+ results = [[], [], [], []]
+ for i in range(len(bboxes)):
+ results[0].append(pred[0].names[int(pred[0].boxes[i].cls.item())])
+ results[1].append(bboxes[i])
+ results[2].append(segms[i])
+ results[3].append(pred[0].boxes[i].conf.cpu().numpy())
+
+ return results
+
+
+class UltraBBoxDetector:
+ bbox_model = None
+
+ def __init__(self, bbox_model):
+ self.bbox_model = bbox_model
+
+ def detect(self, image, threshold, dilation, crop_factor, drop_size=1, detailer_hook=None):
+ drop_size = max(drop_size, 1)
+ detected_results = inference_bbox(self.bbox_model, tensor_to_pil(image), threshold)
+ segmasks = core.create_segmasks(detected_results)
+
+ if dilation > 0:
+ segmasks = core.dilate_masks(segmasks, dilation)
+
+ items = []
+ h = image.shape[1]
+ w = image.shape[2]
+
+ for x, label in zip(segmasks, detected_results[0]):
+ item_bbox = x[0]
+ item_mask = x[1]
+
+ y1, x1, y2, x2 = item_bbox
+
+ if x2 - x1 > drop_size and y2 - y1 > drop_size: # minimum dimension must be (2,2) to avoid squeeze issue
+ crop_region = core.make_crop_region(w, h, item_bbox, crop_factor)
+
+ if detailer_hook is not None:
+ crop_region = detailer_hook.post_crop_region(w, h, item_bbox, crop_region)
+
+ cropped_image = core.crop_image(image, crop_region)
+ cropped_mask = core.crop_ndarray2(item_mask, crop_region)
+ confidence = x[2]
+ # bbox_size = (item_bbox[2]-item_bbox[0],item_bbox[3]-item_bbox[1]) # (w,h)
+
+ item = core.SEG(cropped_image, cropped_mask, confidence, crop_region, item_bbox, label, None)
+
+ items.append(item)
+
+ shape = image.shape[1], image.shape[2]
+ segs = shape, items
+
+ if detailer_hook is not None and hasattr(detailer_hook, "post_detection"):
+ segs = detailer_hook.post_detection(segs)
+
+ return segs
+
+ def detect_combined(self, image, threshold, dilation):
+ detected_results = inference_bbox(self.bbox_model, core.tensor2pil(image), threshold)
+ segmasks = core.create_segmasks(detected_results)
+ if dilation > 0:
+ segmasks = core.dilate_masks(segmasks, dilation)
+
+ return core.combine_masks(segmasks)
+
+ def setAux(self, x):
+ pass
diff --git a/scripts/reactor_faceswap.py b/scripts/reactor_faceswap.py
new file mode 100644
index 0000000..7e6c03e
--- /dev/null
+++ b/scripts/reactor_faceswap.py
@@ -0,0 +1,185 @@
+import os, glob
+
+from PIL import Image
+
+import modules.scripts as scripts
+# from modules.upscaler import Upscaler, UpscalerData
+from modules import scripts, scripts_postprocessing
+from modules.processing import (
+ StableDiffusionProcessing,
+ StableDiffusionProcessingImg2Img,
+)
+from modules.shared import state
+from scripts.reactor_logger import logger
+from scripts.reactor_swapper import (
+ swap_face,
+ swap_face_many,
+ get_current_faces_model,
+ analyze_faces,
+ half_det_size,
+ providers
+)
+import folder_paths
+import comfy.model_management as model_management
+
+
+def get_models():
+ models_path = os.path.join(folder_paths.models_dir,"insightface/*")
+ models = glob.glob(models_path)
+ models = [x for x in models if x.endswith(".onnx") or x.endswith(".pth")]
+ return models
+
+
+class FaceSwapScript(scripts.Script):
+
+ def process(
+ self,
+ p: StableDiffusionProcessing,
+ img,
+ enable,
+ source_faces_index,
+ faces_index,
+ model,
+ swap_in_source,
+ swap_in_generated,
+ gender_source,
+ gender_target,
+ face_model,
+ faces_order,
+ face_boost_enabled,
+ face_restore_model,
+ face_restore_visibility,
+ codeformer_weight,
+ interpolation,
+ ):
+ self.enable = enable
+ if self.enable:
+
+ self.source = img
+ self.swap_in_generated = swap_in_generated
+ self.gender_source = gender_source
+ self.gender_target = gender_target
+ self.model = model
+ self.face_model = face_model
+ self.faces_order = faces_order
+ self.face_boost_enabled = face_boost_enabled
+ self.face_restore_model = face_restore_model
+ self.face_restore_visibility = face_restore_visibility
+ self.codeformer_weight = codeformer_weight
+ self.interpolation = interpolation
+ self.source_faces_index = [
+ int(x) for x in source_faces_index.strip(",").split(",") if x.isnumeric()
+ ]
+ self.faces_index = [
+ int(x) for x in faces_index.strip(",").split(",") if x.isnumeric()
+ ]
+ if len(self.source_faces_index) == 0:
+ self.source_faces_index = [0]
+ if len(self.faces_index) == 0:
+ self.faces_index = [0]
+
+ if self.gender_source is None or self.gender_source == "no":
+ self.gender_source = 0
+ elif self.gender_source == "female":
+ self.gender_source = 1
+ elif self.gender_source == "male":
+ self.gender_source = 2
+
+ if self.gender_target is None or self.gender_target == "no":
+ self.gender_target = 0
+ elif self.gender_target == "female":
+ self.gender_target = 1
+ elif self.gender_target == "male":
+ self.gender_target = 2
+
+ # if self.source is not None:
+ if isinstance(p, StableDiffusionProcessingImg2Img) and swap_in_source:
+ logger.status(f"Working: source face index %s, target face index %s", self.source_faces_index, self.faces_index)
+
+ if len(p.init_images) == 1:
+
+ result = swap_face(
+ self.source,
+ p.init_images[0],
+ source_faces_index=self.source_faces_index,
+ faces_index=self.faces_index,
+ model=self.model,
+ gender_source=self.gender_source,
+ gender_target=self.gender_target,
+ face_model=self.face_model,
+ faces_order=self.faces_order,
+ face_boost_enabled=self.face_boost_enabled,
+ face_restore_model=self.face_restore_model,
+ face_restore_visibility=self.face_restore_visibility,
+ codeformer_weight=self.codeformer_weight,
+ interpolation=self.interpolation,
+ )
+ p.init_images[0] = result
+
+ # for i in range(len(p.init_images)):
+ # if state.interrupted or model_management.processing_interrupted():
+ # logger.status("Interrupted by User")
+ # break
+ # if len(p.init_images) > 1:
+ # logger.status(f"Swap in %s", i)
+ # result = swap_face(
+ # self.source,
+ # p.init_images[i],
+ # source_faces_index=self.source_faces_index,
+ # faces_index=self.faces_index,
+ # model=self.model,
+ # gender_source=self.gender_source,
+ # gender_target=self.gender_target,
+ # face_model=self.face_model,
+ # )
+ # p.init_images[i] = result
+
+ elif len(p.init_images) > 1:
+ result = swap_face_many(
+ self.source,
+ p.init_images,
+ source_faces_index=self.source_faces_index,
+ faces_index=self.faces_index,
+ model=self.model,
+ gender_source=self.gender_source,
+ gender_target=self.gender_target,
+ face_model=self.face_model,
+ faces_order=self.faces_order,
+ face_boost_enabled=self.face_boost_enabled,
+ face_restore_model=self.face_restore_model,
+ face_restore_visibility=self.face_restore_visibility,
+ codeformer_weight=self.codeformer_weight,
+ interpolation=self.interpolation,
+ )
+ p.init_images = result
+
+ logger.status("--Done!--")
+ # else:
+ # logger.error(f"Please provide a source face")
+
+ def postprocess_batch(self, p, *args, **kwargs):
+ if self.enable:
+ images = kwargs["images"]
+
+ def postprocess_image(self, p, script_pp: scripts.PostprocessImageArgs, *args):
+ if self.enable and self.swap_in_generated:
+ if self.source is not None:
+ logger.status(f"Working: source face index %s, target face index %s", self.source_faces_index, self.faces_index)
+ image: Image.Image = script_pp.image
+ result = swap_face(
+ self.source,
+ image,
+ source_faces_index=self.source_faces_index,
+ faces_index=self.faces_index,
+ model=self.model,
+ upscale_options=self.upscale_options,
+ gender_source=self.gender_source,
+ gender_target=self.gender_target,
+ )
+ try:
+ pp = scripts_postprocessing.PostprocessedImage(result)
+ pp.info = {}
+ p.extra_generation_params.update(pp.info)
+ script_pp.image = pp.image
+ except:
+ logger.error(f"Cannot create a result image")
diff --git a/scripts/reactor_logger.py b/scripts/reactor_logger.py
new file mode 100644
index 0000000..f64e433
--- /dev/null
+++ b/scripts/reactor_logger.py
@@ -0,0 +1,47 @@
+import logging
+import copy
+import sys
+
+from modules import shared
+from reactor_utils import addLoggingLevel
+
+
+class ColoredFormatter(logging.Formatter):
+ COLORS = {
+ "DEBUG": "\033[0;36m", # CYAN
+ "STATUS": "\033[38;5;173m", # Calm ORANGE
+ "INFO": "\033[0;32m", # GREEN
+ "WARNING": "\033[0;33m", # YELLOW
+ "ERROR": "\033[0;31m", # RED
+ "CRITICAL": "\033[0;37;41m", # WHITE ON RED
+ "RESET": "\033[0m", # RESET COLOR
+ }
+
+ def format(self, record):
+ colored_record = copy.copy(record)
+ levelname = colored_record.levelname
+ seq = self.COLORS.get(levelname, self.COLORS["RESET"])
+ colored_record.levelname = f"{seq}{levelname}{self.COLORS['RESET']}"
+ return super().format(colored_record)
+
+
+# Create a new logger
+logger = logging.getLogger("ReActor")
+logger.propagate = False
+
+# Add Custom Level
+# logging.addLevelName(logging.INFO, "STATUS")
+addLoggingLevel("STATUS", logging.INFO + 5)
+
+# Add handler if we don't have one.
+if not logger.handlers:
+ handler = logging.StreamHandler(sys.stdout)
+ handler.setFormatter(
+ ColoredFormatter("[%(name)s] %(asctime)s - %(levelname)s - %(message)s",datefmt="%H:%M:%S")
+ )
+ logger.addHandler(handler)
+
+# Configure logger
+loglevel_string = getattr(shared.cmd_opts, "reactor_loglevel", "INFO")
+loglevel = getattr(logging, loglevel_string.upper(), "info")
+logger.setLevel(loglevel)
diff --git a/scripts/reactor_swapper.py b/scripts/reactor_swapper.py
new file mode 100644
index 0000000..6db5dfc
--- /dev/null
+++ b/scripts/reactor_swapper.py
@@ -0,0 +1,572 @@
+import os
+import shutil
+from typing import List, Union
+
+import cv2
+import numpy as np
+from PIL import Image
+
+import insightface
+from insightface.app.common import Face
+# try:
+# import torch.cuda as cuda
+# except:
+# cuda = None
+import torch
+
+import folder_paths
+import comfy.model_management as model_management
+from modules.shared import state
+
+from scripts.reactor_logger import logger
+from reactor_utils import (
+ move_path,
+ get_image_md5hash,
+)
+from scripts.r_faceboost import swapper, restorer
+
+import warnings
+
+np.warnings = warnings
+np.warnings.filterwarnings('ignore')
+
+# PROVIDERS
+try:
+ if torch.cuda.is_available():
+ providers = ["CUDAExecutionProvider"]
+ elif torch.backends.mps.is_available():
+ providers = ["CoreMLExecutionProvider"]
+ elif hasattr(torch,'dml') or hasattr(torch,'privateuseone'):
+ providers = ["ROCMExecutionProvider"]
+ else:
+ providers = ["CPUExecutionProvider"]
+except Exception as e:
+ logger.debug(f"ExecutionProviderError: {e}.\nEP is set to CPU.")
+ providers = ["CPUExecutionProvider"]
+# if cuda is not None:
+# if cuda.is_available():
+# providers = ["CUDAExecutionProvider"]
+# else:
+# providers = ["CPUExecutionProvider"]
+# else:
+# providers = ["CPUExecutionProvider"]
+
+models_path_old = os.path.join(os.path.dirname(os.path.dirname(__file__)), "models")
+insightface_path_old = os.path.join(models_path_old, "insightface")
+insightface_models_path_old = os.path.join(insightface_path_old, "models")
+
+models_path = folder_paths.models_dir
+insightface_path = os.path.join(models_path, "insightface")
+insightface_models_path = os.path.join(insightface_path, "models")
+
+if os.path.exists(models_path_old):
+ move_path(insightface_models_path_old, insightface_models_path)
+ move_path(insightface_path_old, insightface_path)
+ move_path(models_path_old, models_path)
+if os.path.exists(insightface_path) and os.path.exists(insightface_path_old):
+ shutil.rmtree(insightface_path_old)
+ shutil.rmtree(models_path_old)
+
+
+FS_MODEL = None
+CURRENT_FS_MODEL_PATH = None
+
+ANALYSIS_MODELS = {
+ "640": None,
+ "320": None,
+}
+
+SOURCE_FACES = None
+SOURCE_IMAGE_HASH = None
+TARGET_FACES = None
+TARGET_IMAGE_HASH = None
+TARGET_FACES_LIST = []
+TARGET_IMAGE_LIST_HASH = []
+
+def unload_model(model):
+ if model is not None:
+ # check if model has unload method
+ # if "unload" in model:
+ # model.unload()
+ # if "model_unload" in model:
+ # model.model_unload()
+ del model
+ return None
+
+def unload_all_models():
+ global FS_MODEL, CURRENT_FS_MODEL_PATH
+ FS_MODEL = unload_model(FS_MODEL)
+ ANALYSIS_MODELS["320"] = unload_model(ANALYSIS_MODELS["320"])
+ ANALYSIS_MODELS["640"] = unload_model(ANALYSIS_MODELS["640"])
+
+def get_current_faces_model():
+ global SOURCE_FACES
+ return SOURCE_FACES
+
+def getAnalysisModel(det_size = (640, 640)):
+ global ANALYSIS_MODELS
+ ANALYSIS_MODEL = ANALYSIS_MODELS[str(det_size[0])]
+ if ANALYSIS_MODEL is None:
+ ANALYSIS_MODEL = insightface.app.FaceAnalysis(
+ name="buffalo_l", providers=providers, root=insightface_path
+ )
+ ANALYSIS_MODEL.prepare(ctx_id=0, det_size=det_size)
+ ANALYSIS_MODELS[str(det_size[0])] = ANALYSIS_MODEL
+ return ANALYSIS_MODEL
+
+def getFaceSwapModel(model_path: str):
+ global FS_MODEL, CURRENT_FS_MODEL_PATH
+ if FS_MODEL is None or CURRENT_FS_MODEL_PATH is None or CURRENT_FS_MODEL_PATH != model_path:
+ CURRENT_FS_MODEL_PATH = model_path
+ FS_MODEL = unload_model(FS_MODEL)
+ FS_MODEL = insightface.model_zoo.get_model(model_path, providers=providers)
+
+ return FS_MODEL
+
+
+def sort_by_order(face, order: str):
+ if order == "left-right":
+ return sorted(face, key=lambda x: x.bbox[0])
+ if order == "right-left":
+ return sorted(face, key=lambda x: x.bbox[0], reverse = True)
+ if order == "top-bottom":
+ return sorted(face, key=lambda x: x.bbox[1])
+ if order == "bottom-top":
+ return sorted(face, key=lambda x: x.bbox[1], reverse = True)
+ if order == "small-large":
+ return sorted(face, key=lambda x: (x.bbox[2] - x.bbox[0]) * (x.bbox[3] - x.bbox[1]))
+ # if order == "large-small":
+ # return sorted(face, key=lambda x: (x.bbox[2] - x.bbox[0]) * (x.bbox[3] - x.bbox[1]), reverse = True)
+ # by default "large-small":
+ return sorted(face, key=lambda x: (x.bbox[2] - x.bbox[0]) * (x.bbox[3] - x.bbox[1]), reverse = True)
+
+def get_face_gender(
+ face,
+ face_index,
+ gender_condition,
+ operated: str,
+ order: str,
+):
+ gender = [
+ x.sex
+ for x in face
+ ]
+ gender.reverse()
+ # If index is outside of bounds, return None, avoid exception
+ if face_index >= len(gender):
+ logger.status("Requested face index (%s) is out of bounds (max available index is %s)", face_index, len(gender))
+ return None, 0
+ face_gender = gender[face_index]
+ logger.status("%s Face %s: Detected Gender -%s-", operated, face_index, face_gender)
+ if (gender_condition == 1 and face_gender == "F") or (gender_condition == 2 and face_gender == "M"):
+ logger.status("OK - Detected Gender matches Condition")
+ try:
+ faces_sorted = sort_by_order(face, order)
+ return faces_sorted[face_index], 0
+ # return sorted(face, key=lambda x: x.bbox[0])[face_index], 0
+ except IndexError:
+ return None, 0
+ else:
+ logger.status("WRONG - Detected Gender doesn't match Condition")
+ faces_sorted = sort_by_order(face, order)
+ return faces_sorted[face_index], 1
+ # return sorted(face, key=lambda x: x.bbox[0])[face_index], 1
+
+def half_det_size(det_size):
+ logger.status("Trying to halve 'det_size' parameter")
+ return (det_size[0] // 2, det_size[1] // 2)
+
+def analyze_faces(img_data: np.ndarray, det_size=(640, 640)):
+ face_analyser = getAnalysisModel(det_size)
+ faces = face_analyser.get(img_data)
+
+ # Try halving det_size if no faces are found
+ if len(faces) == 0 and det_size[0] > 320 and det_size[1] > 320:
+ det_size_half = half_det_size(det_size)
+ return analyze_faces(img_data, det_size_half)
+
+ return faces
+
+def get_face_single(img_data: np.ndarray, face, face_index=0, det_size=(640, 640), gender_source=0, gender_target=0, order="large-small"):
+
+ buffalo_path = os.path.join(insightface_models_path, "buffalo_l.zip")
+ if os.path.exists(buffalo_path):
+ os.remove(buffalo_path)
+
+ if gender_source != 0:
+ if len(face) == 0 and det_size[0] > 320 and det_size[1] > 320:
+ det_size_half = half_det_size(det_size)
+ return get_face_single(img_data, analyze_faces(img_data, det_size_half), face_index, det_size_half, gender_source, gender_target, order)
+ return get_face_gender(face,face_index,gender_source,"Source", order)
+
+ if gender_target != 0:
+ if len(face) == 0 and det_size[0] > 320 and det_size[1] > 320:
+ det_size_half = half_det_size(det_size)
+ return get_face_single(img_data, analyze_faces(img_data, det_size_half), face_index, det_size_half, gender_source, gender_target, order)
+ return get_face_gender(face,face_index,gender_target,"Target", order)
+
+ if len(face) == 0 and det_size[0] > 320 and det_size[1] > 320:
+ det_size_half = half_det_size(det_size)
+ return get_face_single(img_data, analyze_faces(img_data, det_size_half), face_index, det_size_half, gender_source, gender_target, order)
+
+ try:
+ faces_sorted = sort_by_order(face, order)
+ return faces_sorted[face_index], 0
+ # return sorted(face, key=lambda x: x.bbox[0])[face_index], 0
+ except IndexError:
+ return None, 0
+
+
+def swap_face(
+ source_img: Union[Image.Image, None],
+ target_img: Image.Image,
+ model: Union[str, None] = None,
+ source_faces_index: List[int] = [0],
+ faces_index: List[int] = [0],
+ gender_source: int = 0,
+ gender_target: int = 0,
+ face_model: Union[Face, None] = None,
+ faces_order: List = ["large-small", "large-small"],
+ face_boost_enabled: bool = False,
+ face_restore_model = None,
+ face_restore_visibility: int = 1,
+ codeformer_weight: float = 0.5,
+ interpolation: str = "Bicubic",
+):
+ global SOURCE_FACES, SOURCE_IMAGE_HASH, TARGET_FACES, TARGET_IMAGE_HASH
+ result_image = target_img
+
+ if model is not None:
+
+ if isinstance(source_img, str): # source_img is a base64 string
+ import base64, io
+ if 'base64,' in source_img: # check if the base64 string has a data URL scheme
+ # split the base64 string to get the actual base64 encoded image data
+ base64_data = source_img.split('base64,')[-1]
+ # decode base64 string to bytes
+ img_bytes = base64.b64decode(base64_data)
+ else:
+ # if no data URL scheme, just decode
+ img_bytes = base64.b64decode(source_img)
+
+ source_img = Image.open(io.BytesIO(img_bytes))
+
+ target_img = cv2.cvtColor(np.array(target_img), cv2.COLOR_RGB2BGR)
+
+ if source_img is not None:
+
+ source_img = cv2.cvtColor(np.array(source_img), cv2.COLOR_RGB2BGR)
+
+ source_image_md5hash = get_image_md5hash(source_img)
+
+ if SOURCE_IMAGE_HASH is None:
+ SOURCE_IMAGE_HASH = source_image_md5hash
+ source_image_same = False
+ else:
+ source_image_same = True if SOURCE_IMAGE_HASH == source_image_md5hash else False
+ if not source_image_same:
+ SOURCE_IMAGE_HASH = source_image_md5hash
+
+ logger.info("Source Image MD5 Hash = %s", SOURCE_IMAGE_HASH)
+ logger.info("Source Image the Same? %s", source_image_same)
+
+ if SOURCE_FACES is None or not source_image_same:
+ logger.status("Analyzing Source Image...")
+ source_faces = analyze_faces(source_img)
+ SOURCE_FACES = source_faces
+ elif source_image_same:
+ logger.status("Using Hashed Source Face(s) Model...")
+ source_faces = SOURCE_FACES
+
+ elif face_model is not None:
+
+ source_faces_index = [0]
+ logger.status("Using Loaded Source Face Model...")
+ source_face_model = [face_model]
+ source_faces = source_face_model
+
+ else:
+ logger.error("Cannot detect any Source")
+
+ if source_faces is not None:
+
+ target_image_md5hash = get_image_md5hash(target_img)
+
+ if TARGET_IMAGE_HASH is None:
+ TARGET_IMAGE_HASH = target_image_md5hash
+ target_image_same = False
+ else:
+ target_image_same = True if TARGET_IMAGE_HASH == target_image_md5hash else False
+ if not target_image_same:
+ TARGET_IMAGE_HASH = target_image_md5hash
+
+ logger.info("Target Image MD5 Hash = %s", TARGET_IMAGE_HASH)
+ logger.info("Target Image the Same? %s", target_image_same)
+
+ if TARGET_FACES is None or not target_image_same:
+ logger.status("Analyzing Target Image...")
+ target_faces = analyze_faces(target_img)
+ TARGET_FACES = target_faces
+ elif target_image_same:
+ logger.status("Using Hashed Target Face(s) Model...")
+ target_faces = TARGET_FACES
+
+ # No use in trying to swap faces if no faces are found, enhancement
+ if len(target_faces) == 0:
+ logger.status("Cannot detect any Target, skipping swapping...")
+ return result_image
+
+ if source_img is not None:
+ # separated management of wrong_gender between source and target, enhancement
+ source_face, src_wrong_gender = get_face_single(source_img, source_faces, face_index=source_faces_index[0], gender_source=gender_source, order=faces_order[1])
+ else:
+ # source_face = sorted(source_faces, key=lambda x: x.bbox[0])[source_faces_index[0]]
+ source_face = sorted(source_faces, key=lambda x: (x.bbox[2] - x.bbox[0]) * (x.bbox[3] - x.bbox[1]), reverse = True)[source_faces_index[0]]
+ src_wrong_gender = 0
+
+ if len(source_faces_index) != 0 and len(source_faces_index) != 1 and len(source_faces_index) != len(faces_index):
+ logger.status(f'Source Faces must have no entries (default=0), one entry, or same number of entries as target faces.')
+ elif source_face is not None:
+ result = target_img
+ model_path = model_path = os.path.join(insightface_path, model)
+ face_swapper = getFaceSwapModel(model_path)
+
+ source_face_idx = 0
+
+ for face_num in faces_index:
+ # No use in trying to swap faces if no further faces are found, enhancement
+ if face_num >= len(target_faces):
+ logger.status("Checked all existing target faces, skipping swapping...")
+ break
+
+ if len(source_faces_index) > 1 and source_face_idx > 0:
+ source_face, src_wrong_gender = get_face_single(source_img, source_faces, face_index=source_faces_index[source_face_idx], gender_source=gender_source, order=faces_order[1])
+ source_face_idx += 1
+
+ if source_face is not None and src_wrong_gender == 0:
+ target_face, wrong_gender = get_face_single(target_img, target_faces, face_index=face_num, gender_target=gender_target, order=faces_order[0])
+ if target_face is not None and wrong_gender == 0:
+ logger.status(f"Swapping...")
+ if face_boost_enabled:
+ logger.status(f"Face Boost is enabled")
+ bgr_fake, M = face_swapper.get(result, target_face, source_face, paste_back=False)
+ bgr_fake, scale = restorer.get_restored_face(bgr_fake, face_restore_model, face_restore_visibility, codeformer_weight, interpolation)
+ M *= scale
+ result = swapper.in_swap(target_img, bgr_fake, M)
+ else:
+ # logger.status(f"Swapping as-is")
+ result = face_swapper.get(result, target_face, source_face)
+ elif wrong_gender == 1:
+ wrong_gender = 0
+ # Keep searching for other faces if wrong gender is detected, enhancement
+ #if source_face_idx == len(source_faces_index):
+ # result_image = Image.fromarray(cv2.cvtColor(result, cv2.COLOR_BGR2RGB))
+ # return result_image
+ logger.status("Wrong target gender detected")
+ continue
+ else:
+ logger.status(f"No target face found for {face_num}")
+ elif src_wrong_gender == 1:
+ src_wrong_gender = 0
+ # Keep searching for other faces if wrong gender is detected, enhancement
+ #if source_face_idx == len(source_faces_index):
+ # result_image = Image.fromarray(cv2.cvtColor(result, cv2.COLOR_BGR2RGB))
+ # return result_image
+ logger.status("Wrong source gender detected")
+ continue
+ else:
+ logger.status(f"No source face found for face number {source_face_idx}.")
+
+ result_image = Image.fromarray(cv2.cvtColor(result, cv2.COLOR_BGR2RGB))
+
+ else:
+ logger.status("No source face(s) in the provided Index")
+ else:
+ logger.status("No source face(s) found")
+ return result_image
+
+def swap_face_many(
+ source_img: Union[Image.Image, None],
+ target_imgs: List[Image.Image],
+ model: Union[str, None] = None,
+ source_faces_index: List[int] = [0],
+ faces_index: List[int] = [0],
+ gender_source: int = 0,
+ gender_target: int = 0,
+ face_model: Union[Face, None] = None,
+ faces_order: List = ["large-small", "large-small"],
+ face_boost_enabled: bool = False,
+ face_restore_model = None,
+ face_restore_visibility: int = 1,
+ codeformer_weight: float = 0.5,
+ interpolation: str = "Bicubic",
+):
+ global SOURCE_FACES, SOURCE_IMAGE_HASH, TARGET_FACES, TARGET_IMAGE_HASH, TARGET_FACES_LIST, TARGET_IMAGE_LIST_HASH
+ result_images = target_imgs
+
+ if model is not None:
+
+ if isinstance(source_img, str): # source_img is a base64 string
+ import base64, io
+ if 'base64,' in source_img: # check if the base64 string has a data URL scheme
+ # split the base64 string to get the actual base64 encoded image data
+ base64_data = source_img.split('base64,')[-1]
+ # decode base64 string to bytes
+ img_bytes = base64.b64decode(base64_data)
+ else:
+ # if no data URL scheme, just decode
+ img_bytes = base64.b64decode(source_img)
+
+ source_img = Image.open(io.BytesIO(img_bytes))
+
+ target_imgs = [cv2.cvtColor(np.array(target_img), cv2.COLOR_RGB2BGR) for target_img in target_imgs]
+
+ if source_img is not None:
+
+ source_img = cv2.cvtColor(np.array(source_img), cv2.COLOR_RGB2BGR)
+
+ source_image_md5hash = get_image_md5hash(source_img)
+
+ if SOURCE_IMAGE_HASH is None:
+ SOURCE_IMAGE_HASH = source_image_md5hash
+ source_image_same = False
+ else:
+ source_image_same = True if SOURCE_IMAGE_HASH == source_image_md5hash else False
+ if not source_image_same:
+ SOURCE_IMAGE_HASH = source_image_md5hash
+
+ logger.info("Source Image MD5 Hash = %s", SOURCE_IMAGE_HASH)
+ logger.info("Source Image the Same? %s", source_image_same)
+
+ if SOURCE_FACES is None or not source_image_same:
+ logger.status("Analyzing Source Image...")
+ source_faces = analyze_faces(source_img)
+ SOURCE_FACES = source_faces
+ elif source_image_same:
+ logger.status("Using Hashed Source Face(s) Model...")
+ source_faces = SOURCE_FACES
+
+ elif face_model is not None:
+
+ source_faces_index = [0]
+ logger.status("Using Loaded Source Face Model...")
+ source_face_model = [face_model]
+ source_faces = source_face_model
+
+ else:
+ logger.error("Cannot detect any Source")
+
+ if source_faces is not None:
+
+ target_faces = []
+ for i, target_img in enumerate(target_imgs):
+ if state.interrupted or model_management.processing_interrupted():
+ logger.status("Interrupted by User")
+ break
+
+ target_image_md5hash = get_image_md5hash(target_img)
+ if len(TARGET_IMAGE_LIST_HASH) == 0:
+ TARGET_IMAGE_LIST_HASH = [target_image_md5hash]
+ target_image_same = False
+ elif len(TARGET_IMAGE_LIST_HASH) == i:
+ TARGET_IMAGE_LIST_HASH.append(target_image_md5hash)
+ target_image_same = False
+ else:
+ target_image_same = True if TARGET_IMAGE_LIST_HASH[i] == target_image_md5hash else False
+ if not target_image_same:
+ TARGET_IMAGE_LIST_HASH[i] = target_image_md5hash
+
+ logger.info("(Image %s) Target Image MD5 Hash = %s", i, TARGET_IMAGE_LIST_HASH[i])
+ logger.info("(Image %s) Target Image the Same? %s", i, target_image_same)
+
+ if len(TARGET_FACES_LIST) == 0:
+ logger.status(f"Analyzing Target Image {i}...")
+ target_face = analyze_faces(target_img)
+ TARGET_FACES_LIST = [target_face]
+ elif len(TARGET_FACES_LIST) == i and not target_image_same:
+ logger.status(f"Analyzing Target Image {i}...")
+ target_face = analyze_faces(target_img)
+ TARGET_FACES_LIST.append(target_face)
+ elif len(TARGET_FACES_LIST) != i and not target_image_same:
+ logger.status(f"Analyzing Target Image {i}...")
+ target_face = analyze_faces(target_img)
+ TARGET_FACES_LIST[i] = target_face
+ elif target_image_same:
+ logger.status("(Image %s) Using Hashed Target Face(s) Model...", i)
+ target_face = TARGET_FACES_LIST[i]
+
+
+ # logger.status(f"Analyzing Target Image {i}...")
+ # target_face = analyze_faces(target_img)
+ if target_face is not None:
+ target_faces.append(target_face)
+
+ # No use in trying to swap faces if no faces are found, enhancement
+ if len(target_faces) == 0:
+ logger.status("Cannot detect any Target, skipping swapping...")
+ return result_images
+
+ if source_img is not None:
+ # separated management of wrong_gender between source and target, enhancement
+ source_face, src_wrong_gender = get_face_single(source_img, source_faces, face_index=source_faces_index[0], gender_source=gender_source, order=faces_order[1])
+ else:
+ # source_face = sorted(source_faces, key=lambda x: x.bbox[0])[source_faces_index[0]]
+ source_face = sorted(source_faces, key=lambda x: (x.bbox[2] - x.bbox[0]) * (x.bbox[3] - x.bbox[1]), reverse = True)[source_faces_index[0]]
+ src_wrong_gender = 0
+
+ if len(source_faces_index) != 0 and len(source_faces_index) != 1 and len(source_faces_index) != len(faces_index):
+ logger.status(f'Source Faces must have no entries (default=0), one entry, or same number of entries as target faces.')
+ elif source_face is not None:
+ results = target_imgs
+ model_path = model_path = os.path.join(insightface_path, model)
+ face_swapper = getFaceSwapModel(model_path)
+
+ source_face_idx = 0
+
+ for face_num in faces_index:
+ # No use in trying to swap faces if no further faces are found, enhancement
+ if face_num >= len(target_faces):
+ logger.status("Checked all existing target faces, skipping swapping...")
+ break
+
+ if len(source_faces_index) > 1 and source_face_idx > 0:
+ source_face, src_wrong_gender = get_face_single(source_img, source_faces, face_index=source_faces_index[source_face_idx], gender_source=gender_source, order=faces_order[1])
+ source_face_idx += 1
+
+ if source_face is not None and src_wrong_gender == 0:
+ # Reading results to make current face swap on a previous face result
+ for i, (target_img, target_face) in enumerate(zip(results, target_faces)):
+ target_face_single, wrong_gender = get_face_single(target_img, target_face, face_index=face_num, gender_target=gender_target, order=faces_order[0])
+ if target_face_single is not None and wrong_gender == 0:
+ result = target_img
+ logger.status(f"Swapping {i}...")
+ if face_boost_enabled:
+ logger.status(f"Face Boost is enabled")
+ bgr_fake, M = face_swapper.get(target_img, target_face_single, source_face, paste_back=False)
+ bgr_fake, scale = restorer.get_restored_face(bgr_fake, face_restore_model, face_restore_visibility, codeformer_weight, interpolation)
+ M *= scale
+ result = swapper.in_swap(target_img, bgr_fake, M)
+ else:
+ # logger.status(f"Swapping as-is")
+ result = face_swapper.get(target_img, target_face_single, source_face)
+ results[i] = result
+ elif wrong_gender == 1:
+ wrong_gender = 0
+ logger.status("Wrong target gender detected")
+ continue
+ else:
+ logger.status(f"No target face found for {face_num}")
+ elif src_wrong_gender == 1:
+ src_wrong_gender = 0
+ logger.status("Wrong source gender detected")
+ continue
+ else:
+ logger.status(f"No source face found for face number {source_face_idx}.")
+
+ result_images = [Image.fromarray(cv2.cvtColor(result, cv2.COLOR_BGR2RGB)) for result in results]
+
+ else:
+ logger.status("No source face(s) in the provided Index")
+ else:
+ logger.status("No source face(s) found")
+ return result_images
diff --git a/scripts/reactor_version.py b/scripts/reactor_version.py
new file mode 100644
index 0000000..b4e6267
--- /dev/null
+++ b/scripts/reactor_version.py
@@ -0,0 +1,13 @@
+app_title = "ReActor Node for ComfyUI"
+version_flag = "v0.5.2-a2"
+
+COLORS = {
+ "CYAN": "\033[0;36m", # CYAN
+ "ORANGE": "\033[38;5;173m", # Calm ORANGE
+ "GREEN": "\033[0;32m", # GREEN
+ "YELLOW": "\033[0;33m", # YELLOW
+ "RED": "\033[0;91m", # RED
+ "0": "\033[0m", # RESET COLOR
+}
+
+print(f"{COLORS['YELLOW']}[ReActor]{COLORS['0']} - {COLORS['ORANGE']}STATUS{COLORS['0']} - {COLORS['GREEN']}Running {version_flag} in ComfyUI{COLORS['0']}")