| | |
| | |
| | |
| | |
| | |
| |
|
| | from transformers import PretrainedConfig |
| |
|
| |
|
| | class InternImageConfig(PretrainedConfig): |
| | r""" |
| | This is the configuration class to store the configuration of a [`~InternImageModel`]. |
| | It is used to instantiate an internimage model according to the specified arguments, defining the model |
| | architecture. Instantiating a configuration with the defaults will yield a similar configuration to that of |
| | the internimage [OpenGVLab/internimage](https://huggingface.co/OpenGVLab/internimage) architecture. |
| | |
| | Configuration objects inherit from [`PretrainedConfig`] and can be used |
| | to control the model outputs. Read the documentation from [`PretrainedConfig`] |
| | for more information. |
| | |
| | Args: |
| | core_op (`str`, *optional*, defaults to `"DCNv3"`): |
| | Core operation used in the InternImageModel. |
| | depths (`tuple`, *optional*, defaults to `(4, 4, 18, 4)`): |
| | Tuple specifying the depth of layers in the InternImageModel. |
| | groups (`tuple`, *optional*, defaults to `(4, 8, 16, 32)`): |
| | Tuple specifying the group of layers in the InternImageModel. |
| | channels (`int`, *optional*, defaults to `64`): |
| | Number of channels in the InternImageModel. |
| | dw_kernel_size (`int`, *optional*, defaults to `None`): |
| | Kernel size for depthwise convolutions. |
| | layer_scale (`float`, *optional*, defaults to `None`): |
| | Scale of the layers in the model. |
| | offset_scale (`float`, *optional*, defaults to `1.0`): |
| | Offset scale in the model. |
| | mlp_ratio (`float`, *optional*, defaults to `4.0`): |
| | Ratio of mlp layers in the InternImageModel. |
| | post_norm (`bool`, *optional*, defaults to `False`): |
| | Whether to use post normalization in the model. |
| | level2_post_norm (`bool`, *optional*, defaults to `False`): |
| | Whether to use level 2 post normalization. |
| | level2_post_norm_block_ids (`list`, *optional*, defaults to `None`): |
| | Specific block IDs for level 2 post normalization. |
| | center_feature_scale (`bool`, *optional*, defaults to `False`): |
| | Whether to apply center feature scaling. |
| | use_clip_projector (`bool`, *optional*, defaults to `False`): |
| | Whether to use CLIP projector. |
| | remove_center (`bool`, *optional*, defaults to `False`): |
| | Whether to remove center pixels in some operations. |
| | num_classes (`int`, *optional*, defaults to `1000`): |
| | Number of classes for the model output. |
| | drop_rate (`float`, *optional*, defaults to `0.0`): |
| | Dropout rate in the model. |
| | drop_path_rate (`float`, *optional*, defaults to `0.0`): |
| | Dropout path rate in the model. |
| | drop_path_type (`str`, *optional*, defaults to `"linear"`): |
| | Type of dropout path used in the model. |
| | act_layer (`str`, *optional*, defaults to `"GELU"`): |
| | Activation function used in the model. |
| | norm_layer (`str`, *optional*, defaults to `"LN"`): |
| | Normalization layer used in the model. |
| | cls_scale (`float`, *optional*, defaults to `1.5`): |
| | Scale of the classification layer in the model. |
| | with_cp (`bool`, *optional*, defaults to `False`): |
| | Whether to use checkpointing in the model. |
| | """ |
| | model_type = 'internimage' |
| |
|
| | def __init__( |
| | self, |
| | core_op='DCNv3', |
| | depths=(4, 4, 18, 4), |
| | groups=(4, 8, 16, 32), |
| | channels=64, |
| | dw_kernel_size=None, |
| | layer_scale=None, |
| | offset_scale=1.0, |
| | mlp_ratio=4.0, |
| | post_norm=False, |
| | res_post_norm=False, |
| | level2_post_norm=False, |
| | level2_post_norm_block_ids=None, |
| | center_feature_scale=False, |
| | use_clip_projector=False, |
| | remove_center=False, |
| | num_classes=1000, |
| | drop_rate=0.0, |
| | drop_path_rate=0.0, |
| | drop_path_type='linear', |
| | act_layer='GELU', |
| | norm_layer='LN', |
| | cls_scale=1.5, |
| | with_cp=False, |
| | **kwargs, |
| | ): |
| | super().__init__(**kwargs) |
| |
|
| | |
| | self.core_op = core_op |
| | self.depths = depths |
| | self.groups = groups |
| | self.channels = channels |
| | self.dw_kernel_size = dw_kernel_size |
| | self.layer_scale = layer_scale |
| | self.offset_scale = offset_scale |
| | self.mlp_ratio = mlp_ratio |
| | self.post_norm = post_norm |
| | self.res_post_norm = res_post_norm |
| | self.level2_post_norm = level2_post_norm |
| | self.level2_post_norm_block_ids = level2_post_norm_block_ids |
| | self.center_feature_scale = center_feature_scale |
| | self.use_clip_projector = use_clip_projector |
| | self.remove_center = remove_center |
| | self.num_classes = num_classes |
| | self.drop_rate = drop_rate |
| | self.drop_path_rate = drop_path_rate |
| | self.drop_path_type = drop_path_type |
| | self.act_layer = act_layer |
| | self.norm_layer = norm_layer |
| | self.cls_scale = cls_scale |
| | self.with_cp = with_cp |
| |
|