1,简单介绍
在2.6.16内核的netfilter中,netfilter一个重大修正思想就是将netfilter作为一个协议无关的框架,表现在内核结构树中单独建立net/netfilter目录,而在以前netfilter是附着在各个协议目录之下的,如在net/ipv4, net/ipv6等目录下。现在虽然各协议目录下也都有,但主要是处理和各协议相关的东西了,而一些共同的东西,就都放在net/netfilter目录下,文件名也有所改变,虽然现在还不是很独立,比如说net/netfilter/nf_conntrack_core.c和net/ipv4/netfilter/ip_conntrack_core.c就仍然很相似,让人觉得没必要那么分,但不少和协议无关的匹配和目标模块已经和协议分离,只在此目录下有,而不放在协议目录下了。
在net/netfilter下的匹配和目标模块文件名称都以“xt_”打头,如 xt_comment.c,xt_policy.c等
目标模块有:
xt_CLASSIFY.c
xt_NFQUEUE.c
xt_NOTRACK.c
为了和iptables兼容(因为iptables找模块文件前缀是按“ipt_”或“ip6t_”找的),这些文件中增加了一个新的宏定义:MODULE_ALIAS,来表示模块的别名。
如在xt_limit.c中就如下定义:
MODULE_ALIAS("ipt_limit");
MODULE_ALIAS("ip6t_limit");
在include/linux/netfilter_ipv4/ip_tables.h中进行了以下定义:
#define ipt_match xt_match
#define ipt_target xt_target
#define ipt_table xt_table
2,代码分析
以下是新匹配和目标模块的结构定义:
struct xt_match
{
struct list_head list;
const char name[XT_FUNCTION_MAXNAMELEN-1];
/* Return true or false: return FALSE and set *hotdrop = 1 to
force immediate packet drop. */
/* Arguments changed since 2.6.9, as this must now handle
non-linear skb, using skb_header_pointer and
skb_ip_make_writable. */
int (*match)(const struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
const struct xt_match *match,
const void *matchinfo,
int offset,
unsigned int protoff,
int *hotdrop);
/* Called when user tries to insert an entry of this type. */
/* Should return true or false. */
int (*checkentry)(const char *tablename,
const void *ip,
const struct xt_match *match,
void *matchinfo,
unsigned int matchinfosize,
unsigned int hook_mask);
/* Called when entry of this type deleted. */
void (*destroy)(const struct xt_match *match, void *matchinfo,
unsigned int matchinfosize);
/* Called when userspace align differs from kernel space one */
int (*compat)(void *match, void **dstptr, int *size, int convert);
/* Set this to THIS_MODULE if you are a module, otherwise NULL */
struct module *me;
char *table;
unsigned int matchsize;
unsigned int hooks;
unsigned short proto;
unsigned short family;
u_int8_t revision;
};
/* Registration hooks for targets. */
struct xt_target
{
struct list_head list;
const char name[XT_FUNCTION_MAXNAMELEN-1];
/* Returns verdict. Argument order changed since 2.6.9, as this
must now handle non-linear skbs, using skb_copy_bits and
skb_ip_make_writable. */
unsigned int (*target)(struct sk_buff **pskb,
const struct net_device *in,
const struct net_device *out,
unsigned int hooknum,
const struct xt_target *target,
const void *targinfo,
void *userdata);
/* Called when user tries to insert an entry of this type:
hook_mask is a bitmask of hooks from which it can be
called. */
/* Should return true or false. */
int (*checkentry)(const char *tablename,
const void *entry,
const struct xt_target *target,
void *targinfo,
unsigned int targinfosize,
unsigned int hook_mask);
/* Called when entry of this type deleted. */
void (*destroy)(const struct xt_target *target, void *targinfo,
unsigned int targinfosize);
/* Called when userspace align differs from kernel space one */
int (*compat)(void *target, void **dstptr, int *size, int convert);
/* Set this to THIS_MODULE if you are a module, otherwise NULL */
struct module *me;
char *table;
unsigned int targetsize;
unsigned int hooks;
unsigned short proto;
unsigned short family;
u_int8_t revision;
};
/* Furniture shopping... */
struct xt_table
{
struct list_head list;
/* A unique name... */
char name[XT_TABLE_MAXNAMELEN];
/* What hooks you will enter on */
unsigned int valid_hooks;
/* Lock for the curtain */
rwlock_t lock;
/* Man behind the curtain... */
//struct ip6t_table_info *private;
void *private;
/* Set this to THIS_MODULE if you are a module, otherwise NULL */
struct module *me;
int af; /* address/protocol family */
};
/* The table itself */
struct xt_table_info
{
/* Size per table */
unsigned int size;
/* Number of entries: FIXME. --RR */
unsigned int number;
/* Initial number of entries. Needed for module usage count */
unsigned int initial_entries;
/* Entry points and underflows */
unsigned int hook_entry[NF_IP_NUMHOOKS];
unsigned int underflow[NF_IP_NUMHOOKS];
/* ipt_entry tables: one per CPU */
char *entries[NR_CPUS];
};
/* 主要结构 */
struct xt_af {
struct mutex mutex;
struct list_head match;
struct list_head target;
struct list_head tables;
struct mutex compat_mutex;
};
/*数据结构的管理模块 */
static struct xt_af *xt;
/* netfilter模块初始化*/
static int __init xt_init(void)
{
int i;
/* 每种协议分配一个资源 */
xt = kmalloc(sizeof(struct xt_af) * NPROTO, GFP_KERNEL);
if (!xt)
return -ENOMEM;
for (i = 0; i < NPROTO; i++) {
mutex_init(&xt.mutex);
#ifdef CONFIG_COMPAT
mutex_init(&xt.compat_mutex);
#endif
/* 初始化table, target, match资源 */
INIT_LIST_HEAD(&xt.target);
INIT_LIST_HEAD(&xt.match);
INIT_LIST_HEAD(&xt.tables);
}
return 0;
}
目前2.6.16内核中支持了三类协议族,IPv4/IPv6/ARP,在各协议族中查找相应模块用的前缀为:
static const char *xt_prefix[NPROTO] = {
[AF_INET] = "ip",
[AF_INET6] = "ip6",
[NF_ARP] = "arp",
};
对应的具体前缀分别为“ipt”、“ip6t”、“arpt”。
而和老的2.4内核的struct ipt_match和struct ipt_target结构的主要区别是增加了compat函数,以及struct modulde *me参数后面的一系列参数,是和协议相关的,比如limit匹配,分别为ipv4和ipv6定义了匹配结构后,只有family参数不同,一个是AF_INET,另一个是AF_INET6,其他都相同,而挂接时并不会有问题,因为这些模块都分别挂接到不同协议族的链表:
/* Registration hooks for targets. */
int
xt_register_target(struct xt_target *target)
{
int ret, af = target->family;
ret = mutex_lock_interruptible(&xt[af].mutex);
if (ret != 0)
return ret;
/* 添加 target*/
list_add(&target->list, &xt[af].target);
mutex_unlock(&xt[af].mutex);
return ret;
}
int
xt_register_match(struct xt_match *match)
{
int ret, af = match->family;
ret = mutex_lock_interruptible(&xt[af].mutex);
if (ret != 0)
return ret;
/* 添加match */
list_add(&match->list, &xt[af].match);
mutex_unlock(&xt[af].mutex);
return ret;
}
table注册发生在各协议的netfilte中:
int ipt_register_table(struct xt_table *table, const struct ipt_replace *repl)
{
int ret;
struct xt_table_info *newinfo;
static struct xt_table_info bootstrap
= { 0, 0, 0, { 0 }, { 0 }, { } };
void *loc_cpu_entry;
newinfo = xt_alloc_table_info(repl->size);
if (!newinfo)
return -ENOMEM;
/* choose the copy on our node/cpu
* but dont care of preemption
*/
loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
memcpy(loc_cpu_entry, repl->entries, repl->size);
ret = translate_table(table->name, table->valid_hooks,
newinfo, loc_cpu_entry, repl->size,
repl->num_entries,
repl->hook_entry,
repl->underflow);
if (ret != 0) {
xt_free_table_info(newinfo);
return ret;
}
if (xt_register_table(table, &bootstrap, newinfo) != 0) {
xt_free_table_info(newinfo);
return ret;
}
return 0;
}
/* 分配table_info资源,注意这里是每个cpu会对应一个entry */
struct xt_table_info *xt_alloc_table_info(unsigned int size)
{
struct xt_table_info *newinfo;
int cpu;
/* Pedantry: prevent them from hitting BUG() in vmalloc.c --RR */
if ((SMP_ALIGN(size) >> PAGE_SHIFT) + 2 > num_physpages) /*超过物理内存空间*/
return NULL;
newinfo = kzalloc(sizeof(struct xt_table_info), GFP_KERNEL);
if (!newinfo)
return NULL;
newinfo->size = size;
for_each_possible_cpu(cpu) {/* 遍历每个cpu */
if (size <= PAGE_SIZE)
newinfo->entries[cpu] = kmalloc_node(size,
GFP_KERNEL,
cpu_to_node(cpu)); /* 直接分配物理空间 */
else
newinfo->entries[cpu] = vmalloc_node(size,
cpu_to_node(cpu));/* 分配虚拟空间*/
if (newinfo->entries[cpu] == NULL) {
xt_free_table_info(newinfo);
return NULL;
}
}
return newinfo;
}
int xt_register_table(struct xt_table *table,
struct xt_table_info *bootstrap,
struct xt_table_info *newinfo)
{
int ret;
struct xt_table_info *private;
ret = mutex_lock_interruptible(&xt[table->af].mutex);
if (ret != 0)
return ret;
/* Don't autoload: we'd eat our tail... */
if (list_named_find(&xt[table->af].tables, table->name)) {
ret = -EEXIST;
goto unlock;
}
/* Simplifies replace_table code. */
table->private = bootstrap;
rwlock_init(&table->lock);
if (!xt_replace_table(table, 0, newinfo, &ret))
goto unlock;
private = table->private;
duprintf("table->private->number = %u\n", private->number);
/* save number of initial entries */
private->initial_entries = private->number;
list_prepend(&xt[table->af].tables, table);
ret = 0;
unlock:
mutex_unlock(&xt[table->af].mutex);
return ret;
}
但在进行实际匹配目标查找时会进行名字、协议族、表名、挂接点、协议等的比较,如匹配的检查
int xt_check_match(const struct xt_match *match, unsigned short family,
unsigned int size, const char *table, unsigned int hook_mask,
unsigned short proto, int inv_proto)
{
if (XT_ALIGN(match->matchsize) != size) {
printk("%s_tables: %s match: invalid size %Zu != %u\n",
xt_prefix[family], match->name,
XT_ALIGN(match->matchsize), size);
return -EINVAL;
}
if (match->table && strcmp(match->table, table)) {
printk("%s_tables: %s match: only valid in %s table, not %s\n",
xt_prefix[family], match->name, match->table, table);
return -EINVAL;
}
if (match->hooks && (hook_mask & ~match->hooks) != 0) {
printk("%s_tables: %s match: bad hook_mask %u\n",
xt_prefix[family], match->name, hook_mask);
return -EINVAL;
}
if (match->proto && (match->proto != proto || inv_proto)) {
printk("%s_tables: %s match: only valid for protocol %u\n",
xt_prefix[family], match->name, match->proto);
return -EINVAL;
}
return 0;
}
int xt_check_target(const struct xt_target *target, unsigned short family,
unsigned int size, const char *table, unsigned int hook_mask,
unsigned short proto, int inv_proto)
{
if (XT_ALIGN(target->targetsize) != size) {
printk("%s_tables: %s target: invalid size %Zu != %u\n",
xt_prefix[family], target->name,
XT_ALIGN(target->targetsize), size);
return -EINVAL;
}
if (target->table && strcmp(target->table, table)) {
printk("%s_tables: %s target: only valid in %s table, not %s\n",
xt_prefix[family], target->name, target->table, table);
return -EINVAL;
}
if (target->hooks && (hook_mask & ~target->hooks) != 0) {
printk("%s_tables: %s target: bad hook_mask %u\n",
xt_prefix[family], target->name, hook_mask);
return -EINVAL;
}
if (target->proto && (target->proto != proto || inv_proto)) {
printk("%s_tables: %s target: only valid for protocol %u\n",
xt_prefix[family], target->name, target->proto);
return -EINVAL;
}
return 0;
}
/* 下面是ipsec的policy检验过程处理 */
static struct xt_match policy_match = {
.name = "policy",
.family = AF_INET,
.match = match,
.matchsize = sizeof(struct xt_policy_info),
.checkentry = checkentry,
.family = AF_INET,
.me = THIS_MODULE,
};
static int __init init(void)
{
int ret;
ret = xt_register_match(&policy_match);
if (ret)
return ret;
ret = xt_register_match(&policy6_match);
if (ret)
xt_unregister_match(&policy_match);
return ret;
}
static int
match_policy_in(const struct sk_buff *skb, const struct xt_policy_info *info,
unsigned short family)
{
const struct xt_policy_elem *e;
struct sec_path *sp = skb->sp;
int strict = info->flags & XT_POLICY_MATCH_STRICT;
int i, pos;
if (sp == NULL)
return -1;
if (strict && info->len != sp->len)
return 0;
for (i = sp->len - 1; i >= 0; i--) {
pos = strict ? i - sp->len + 1 : 0;
if (pos >= info->len)
return 0;
e = &info->pol[pos];
/* 检查策略 */
if (match_xfrm_state(sp->xvec, e, family)) {
if (!strict)
return 1;
} else if (strict)
return 0;
}
return strict ? 1 : 0;
}