From 27a7fa73eeb51d61d8c701a40910c42ed96bc890 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 13 Jun 2007 17:33:25 +0000 Subject: [PATCH] Add esfq to iproute2 and 2.4 kernel (#1891) SVN-Revision: 7612 --- .../patches/006-iproute2-tc_esfq.patch | 295 +++++++ .../generic-2.4/patches/620-tc_esfq.patch | 747 ++++++++++++++++++ 2 files changed, 1042 insertions(+) create mode 100644 package/iproute2/patches/006-iproute2-tc_esfq.patch create mode 100644 target/linux/generic-2.4/patches/620-tc_esfq.patch diff --git a/package/iproute2/patches/006-iproute2-tc_esfq.patch b/package/iproute2/patches/006-iproute2-tc_esfq.patch new file mode 100644 index 0000000000..d0347549dd --- /dev/null +++ b/package/iproute2/patches/006-iproute2-tc_esfq.patch @@ -0,0 +1,295 @@ +diff -urN --exclude=.svn iproute2-2.6.11-050330/include/linux/pkt_sched.h iproute2-2.6.11-050330/include/linux/pkt_sched.h +--- iproute2-2.6.11-050330/include/linux/pkt_sched.h 2007-05-04 22:21:48.000000000 -0400 ++++ iproute2-2.6.11-050330/include/linux/pkt_sched.h 2007-05-04 22:27:12.000000000 -0400 +@@ -174,8 +174,38 @@ + * + * The only reason for this is efficiency, it is possible + * to change these parameters in compile time. ++ * ++ * If you need to play with these values use esfq instead. + */ + ++/* ESFQ section */ ++ ++enum ++{ ++ /* traditional */ ++ TCA_SFQ_HASH_CLASSIC, ++ TCA_SFQ_HASH_DST, ++ TCA_SFQ_HASH_SRC, ++ /* conntrack */ ++ TCA_SFQ_HASH_CTORIGDST, ++ TCA_SFQ_HASH_CTORIGSRC, ++ TCA_SFQ_HASH_CTREPLDST, ++ TCA_SFQ_HASH_CTREPLSRC, ++ TCA_SFQ_HASH_CTNATCHG, ++}; ++ ++struct tc_esfq_qopt ++{ ++ unsigned quantum; /* Bytes per round allocated to flow */ ++ int perturb_period; /* Period of hash perturbation */ ++ __u32 limit; /* Maximal packets in queue */ ++ unsigned divisor; /* Hash divisor */ ++ unsigned flows; /* Maximal number of flows */ ++ unsigned hash_kind; /* Hash function to use for flow identification */ ++}; ++ ++ ++ + /* RED section */ + + enum +@@ -551,8 +580,37 @@ + * + * The only reason for this is efficiency, it is possible + * to change these parameters in compile time. ++ * ++ * If you need to play with these values use esfq instead. + */ + ++/* ESFQ section */ ++ ++enum ++{ ++ /* traditional */ ++ TCA_SFQ_HASH_CLASSIC, ++ TCA_SFQ_HASH_DST, ++ TCA_SFQ_HASH_SRC, ++ /* conntrack */ ++ TCA_SFQ_HASH_CTORIGDST, ++ TCA_SFQ_HASH_CTORIGSRC, ++ TCA_SFQ_HASH_CTREPLDST, ++ TCA_SFQ_HASH_CTREPLSRC, ++ TCA_SFQ_HASH_CTNATCHG, ++}; ++ ++struct tc_esfq_qopt ++{ ++ unsigned quantum; /* Bytes per round allocated to flow */ ++ int perturb_period; /* Period of hash perturbation */ ++ __u32 limit; /* Maximal packets in queue */ ++ unsigned divisor; /* Hash divisor */ ++ unsigned flows; /* Maximal number of flows */ ++ unsigned hash_kind; /* Hash function to use for flow identification */ ++}; ++ ++ + /* RED section */ + + enum +diff -urN --exclude=.svn iproute2-2.6.11-050330/tc/Makefile iproute2-2.6.11-050330/tc/Makefile +--- iproute2-2.6.11-050330/tc/Makefile 2007-05-04 22:21:48.000000000 -0400 ++++ iproute2-2.6.11-050330/tc/Makefile 2007-05-04 22:27:37.000000000 -0400 +@@ -6,6 +6,7 @@ + TCMODULES := + TCMODULES += q_fifo.o + TCMODULES += q_sfq.o ++TCMODULES += q_esfq.o + TCMODULES += q_red.o + TCMODULES += q_prio.o + TCMODULES += q_tbf.o +diff -urN --exclude=.svn iproute2-2.6.11-050330/tc/q_esfq.c iproute2-2.6.11-050330/tc/q_esfq.c +--- iproute2-2.6.11-050330/tc/q_esfq.c 1969-12-31 19:00:00.000000000 -0500 ++++ iproute2-2.6.11-050330/tc/q_esfq.c 2007-05-04 22:37:54.000000000 -0400 +@@ -0,0 +1,200 @@ ++/* ++ * q_esfq.c ESFQ. ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License ++ * as published by the Free Software Foundation; either version ++ * 2 of the License, or (at your option) any later version. ++ * ++ * Authors: Alexey Kuznetsov, ++ * ++ * Changes: Alexander Atanasov, ++ * Alexander Clouter, ++ * Corey Hickey, ++ * ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include "utils.h" ++#include "tc_util.h" ++ ++static void explain(void) ++{ ++ fprintf(stderr, "Usage: ... esfq [ perturb SECS ] [ quantum BYTES ] [ depth FLOWS ]\n\t[ divisor HASHBITS ] [ limit PKTS ] [ hash HASHTYPE]\n"); ++ fprintf(stderr,"Where: \n"); ++ fprintf(stderr,"HASHTYPE := { classic | src | dst | ctorigdst | ctorigsrc | ctrepldst | ctreplsrc | ctnatchg }\n"); ++} ++ ++#define usage() return(-1) ++ ++static int esfq_parse_opt(struct qdisc_util *qu, int argc, char **argv, struct nlmsghdr *n) ++{ ++ int ok=0; ++ struct tc_esfq_qopt opt; ++ ++ memset(&opt, 0, sizeof(opt)); ++ ++ opt.hash_kind= TCA_SFQ_HASH_CLASSIC; ++ ++ while (argc > 0) { ++ if (strcmp(*argv, "quantum") == 0) { ++ NEXT_ARG(); ++ if (get_size(&opt.quantum, *argv)) { ++ fprintf(stderr, "Illegal \"quantum\"\n"); ++ return -1; ++ } ++ ok++; ++ } else if (strcmp(*argv, "perturb") == 0) { ++ NEXT_ARG(); ++ if (get_integer(&opt.perturb_period, *argv, 0)) { ++ fprintf(stderr, "Illegal \"perturb\"\n"); ++ return -1; ++ } ++ ok++; ++ } else if (strcmp(*argv, "depth") == 0) { ++ NEXT_ARG(); ++ if (get_integer((int *) &opt.flows, *argv, 0)) { ++ fprintf(stderr, "Illegal \"depth\"\n"); ++ return -1; ++ } ++ ok++; ++ } else if (strcmp(*argv, "divisor") == 0) { ++ NEXT_ARG(); ++ if (get_integer((int *) &opt.divisor, *argv, 0)) { ++ fprintf(stderr, "Illegal \"divisor\"\n"); ++ return -1; ++ } ++ if(opt.divisor >= 14) { ++ fprintf(stderr, "Illegal \"divisor\": must be < 14\n"); ++ return -1; ++ } ++ opt.divisor=pow(2,opt.divisor); ++ ok++; ++ } else if (strcmp(*argv, "limit") == 0) { ++ NEXT_ARG(); ++ if (get_integer((int *) &opt.limit, *argv, 0)) { ++ fprintf(stderr, "Illegal \"limit\"\n"); ++ return -1; ++ } ++ ok++; ++ } else if (strcmp(*argv, "hash") == 0) { ++ NEXT_ARG(); ++ if(strcmp(*argv, "classic") == 0) { ++ opt.hash_kind= TCA_SFQ_HASH_CLASSIC; ++ } else ++ if(strcmp(*argv, "dst") == 0) { ++ opt.hash_kind= TCA_SFQ_HASH_DST; ++ } else ++ if(strcmp(*argv, "src") == 0) { ++ opt.hash_kind= TCA_SFQ_HASH_SRC; ++ } else ++ if(strcmp(*argv, "ctorigsrc") == 0) { ++ opt.hash_kind= TCA_SFQ_HASH_CTORIGSRC; ++ } else ++ if(strcmp(*argv, "ctorigdst") == 0) { ++ opt.hash_kind= TCA_SFQ_HASH_CTORIGDST; ++ } else ++ if(strcmp(*argv, "ctreplsrc") == 0) { ++ opt.hash_kind= TCA_SFQ_HASH_CTREPLSRC; ++ } else ++ if(strcmp(*argv, "ctrepldst") == 0) { ++ opt.hash_kind= TCA_SFQ_HASH_CTREPLDST; ++ } else ++ if(strcmp(*argv, "ctnatchg") == 0) { ++ opt.hash_kind= TCA_SFQ_HASH_CTNATCHG; ++ } else { ++ fprintf(stderr, "Illegal \"hash\"\n"); ++ explain(); ++ return -1; ++ } ++ ok++; ++ } else if (strcmp(*argv, "help") == 0) { ++ explain(); ++ return -1; ++ } else { ++ fprintf(stderr, "What is \"%s\"?\n", *argv); ++ explain(); ++ return -1; ++ } ++ argc--; argv++; ++ } ++ ++ if (ok) ++ addattr_l(n, 1024, TCA_OPTIONS, &opt, sizeof(opt)); ++ return 0; ++} ++ ++static int esfq_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt) ++{ ++ struct tc_esfq_qopt *qopt; ++ SPRINT_BUF(b1); ++ ++ if (opt == NULL) ++ return 0; ++ ++ if (RTA_PAYLOAD(opt) < sizeof(*qopt)) ++ return -1; ++ qopt = RTA_DATA(opt); ++ fprintf(f, "quantum %s ", sprint_size(qopt->quantum, b1)); ++ if (show_details) { ++ fprintf(f, "limit %up flows %u/%u ", ++ qopt->limit, qopt->flows, qopt->divisor); ++ } ++ if (qopt->perturb_period) ++ fprintf(f, "perturb %dsec ", qopt->perturb_period); ++ ++ fprintf(f,"hash: "); ++ switch(qopt->hash_kind) ++ { ++ case TCA_SFQ_HASH_CLASSIC: ++ fprintf(f,"classic"); ++ break; ++ case TCA_SFQ_HASH_DST: ++ fprintf(f,"dst"); ++ break; ++ case TCA_SFQ_HASH_SRC: ++ fprintf(f,"src"); ++ break; ++ case TCA_SFQ_HASH_CTORIGSRC: ++ fprintf(f,"ctorigsrc"); ++ break; ++ case TCA_SFQ_HASH_CTORIGDST: ++ fprintf(f,"ctorigdst"); ++ break; ++ case TCA_SFQ_HASH_CTREPLSRC: ++ fprintf(f,"ctreplsrc"); ++ break; ++ case TCA_SFQ_HASH_CTREPLDST: ++ fprintf(f,"ctrepldst"); ++ break; ++ case TCA_SFQ_HASH_CTNATCHG: ++ fprintf(f,"ctnatchg"); ++ break; ++ default: ++ fprintf(f,"Unknown"); ++ } ++ return 0; ++} ++ ++static int esfq_print_xstats(struct qdisc_util *qu, FILE *f, struct rtattr *xstats) ++{ ++ return 0; ++} ++ ++ ++struct qdisc_util esfq_qdisc_util = { ++ .id = "esfq", ++ .parse_qopt = esfq_parse_opt, ++ .print_qopt = esfq_print_opt, ++ .print_xstats = esfq_print_xstats, ++}; diff --git a/target/linux/generic-2.4/patches/620-tc_esfq.patch b/target/linux/generic-2.4/patches/620-tc_esfq.patch new file mode 100644 index 0000000000..d342042c4c --- /dev/null +++ b/target/linux/generic-2.4/patches/620-tc_esfq.patch @@ -0,0 +1,747 @@ +diff -urN target.old/linux/generic-2.4/patches/232-esfq_kmod.patch target/linux/generic-2.4/patches/232-esfq_kmod.patch +--- target.old/linux/generic-2.4/patches/232-esfq_kmod.patch 1969-12-31 19:00:00.000000000 -0500 ++++ target/linux/generic-2.4/patches/232-esfq_kmod.patch 2007-06-08 02:35:06.000000000 -0400 +@@ -0,0 +1,743 @@ ++diff -urN linux-2.4.34/Documentation/Configure.help linux-2.4.34/Documentation/Configure.help ++--- linux-2.4.34/Documentation/Configure.help 2007-05-10 19:37:42.000000000 -0400 +++++ linux-2.4.34/Documentation/Configure.help 2007-05-10 19:49:49.000000000 -0400 ++@@ -11127,6 +11127,24 @@ ++ whenever you want). If you want to compile it as a module, say M ++ here and read . ++ +++ESFQ queue +++CONFIG_NET_SCH_ESFQ +++ Say Y here if you want to use the Stochastic Fairness Queueing (SFQ) +++ packet scheduling algorithm for some of your network devices or as a +++ leaf discipline for the CBQ scheduling algorithm (see the top of +++ for details and references about the SFQ +++ algorithm). +++ +++ This is an enchanced SFQ version which allows you to control the +++ hardcoded values in the SFQ scheduler: queue depth, hash table size, +++ queues limit. Also adds control to the hash function used to identify +++ packet flows. Hash by src or dst ip and original sfq hash. +++ +++ This code is also available as a module called sch_esfq.o ( = code +++ which can be inserted in and removed from the running kernel +++ whenever you want). If you want to compile it as a module, say M +++ here and read . +++ ++ CSZ packet scheduler ++ CONFIG_NET_SCH_CSZ ++ Say Y here if you want to use the Clark-Shenker-Zhang (CSZ) packet ++diff -urN linux-2.4.34/include/linux/pkt_sched.h linux-2.4.34/include/linux/pkt_sched.h ++--- linux-2.4.34/include/linux/pkt_sched.h 2007-05-10 19:38:19.000000000 -0400 +++++ linux-2.4.34/include/linux/pkt_sched.h 2007-05-10 19:53:59.000000000 -0400 ++@@ -173,8 +173,36 @@ ++ * ++ * The only reason for this is efficiency, it is possible ++ * to change these parameters in compile time. +++ * +++ * If you need to play with these values use esfq instead. ++ */ ++ +++/* ESFQ section */ +++ +++enum +++{ +++ /* traditional */ +++ TCA_SFQ_HASH_CLASSIC, +++ TCA_SFQ_HASH_DST, +++ TCA_SFQ_HASH_SRC, +++ /* conntrack */ +++ TCA_SFQ_HASH_CTORIGDST, +++ TCA_SFQ_HASH_CTORIGSRC, +++ TCA_SFQ_HASH_CTREPLDST, +++ TCA_SFQ_HASH_CTREPLSRC, +++ TCA_SFQ_HASH_CTNATCHG, +++}; +++ +++struct tc_esfq_qopt +++{ +++ unsigned quantum; /* Bytes per round allocated to flow */ +++ int perturb_period; /* Period of hash perturbation */ +++ __u32 limit; /* Maximal packets in queue */ +++ unsigned divisor; /* Hash divisor */ +++ unsigned flows; /* Maximal number of flows */ +++ unsigned hash_kind; /* Hash function to use for flow identification */ +++}; +++ ++ /* RED section */ ++ ++ enum ++diff -urN linux-2.4.34/net/sched/Config.in linux-2.4.34/net/sched/Config.in ++--- linux-2.4.34/net/sched/Config.in 2007-05-10 19:38:31.000000000 -0400 +++++ linux-2.4.34/net/sched/Config.in 2007-05-10 19:54:45.000000000 -0400 ++@@ -12,6 +12,7 @@ ++ tristate ' The simplest PRIO pseudoscheduler' CONFIG_NET_SCH_PRIO ++ tristate ' RED queue' CONFIG_NET_SCH_RED ++ tristate ' SFQ queue' CONFIG_NET_SCH_SFQ +++tristate ' ESFQ queue' CONFIG_NET_SCH_ESFQ ++ tristate ' TEQL queue' CONFIG_NET_SCH_TEQL ++ tristate ' TBF queue' CONFIG_NET_SCH_TBF ++ tristate ' GRED queue' CONFIG_NET_SCH_GRED ++diff -urN linux-2.4.34/net/sched/Makefile linux-2.4.34/net/sched/Makefile ++--- linux-2.4.34/net/sched/Makefile 2007-05-10 19:38:31.000000000 -0400 +++++ linux-2.4.34/net/sched/Makefile 2007-05-10 19:55:13.000000000 -0400 ++@@ -19,6 +19,7 @@ ++ obj-$(CONFIG_NET_SCH_HFSC) += sch_hfsc.o ++ obj-$(CONFIG_NET_SCH_HTB) += sch_htb.o ++ obj-$(CONFIG_NET_SCH_SFQ) += sch_sfq.o +++obj-$(CONFIG_NET_SCH_ESFQ) += sch_esfq.o ++ obj-$(CONFIG_NET_SCH_RED) += sch_red.o ++ obj-$(CONFIG_NET_SCH_TBF) += sch_tbf.o ++ obj-$(CONFIG_NET_SCH_PRIO) += sch_prio.o ++diff -urN linux-2.4.34/net/sched/sch_esfq.c linux-2.4.34/net/sched/sch_esfq.c ++--- linux-2.4.34/net/sched/sch_esfq.c 1969-12-31 19:00:00.000000000 -0500 +++++ linux-2.4.34/net/sched/sch_esfq.c 2007-05-10 19:57:15.000000000 -0400 ++@@ -0,0 +1,649 @@ +++/* +++ * net/sched/sch_esfq.c Extended Stochastic Fairness Queueing discipline. +++ * +++ * This program is free software; you can redistribute it and/or +++ * modify it under the terms of the GNU General Public License +++ * as published by the Free Software Foundation; either version +++ * 2 of the License, or (at your option) any later version. +++ * +++ * Authors: Alexey Kuznetsov, +++ * +++ * Changes: Alexander Atanasov, +++ * Added dynamic depth,limit,divisor,hash_kind options. +++ * Added dst and src hashes. +++ * +++ * Alexander Clouter, +++ * Ported ESFQ to Linux 2.6. +++ * +++ * Corey Hickey, +++ * Maintenance of the Linux 2.6 port. +++ * Added fwmark hash (thanks to Robert Kurjata). +++ * Added usage of jhash. +++ * +++ */ +++ +++#include +++#include +++#include +++#include +++#include +++#include +++#include +++#include +++#include +++#include +++#include +++#include +++#include +++#include +++#include +++#include +++#include +++#include +++#include +++#include +++#include +++#include +++#include +++#include +++#include +++#include +++#include +++ +++#define IPPROTO_DCCP 33 +++#define qdisc_priv(q) ((void *)(q->data)) +++ +++#ifdef CONFIG_IP_NF_CONNTRACK +++/* #include */ +++#include +++#endif +++ +++/* Stochastic Fairness Queuing algorithm. +++ For more comments look at sch_sfq.c. +++ The difference is that you can change limit, depth, +++ hash table size and choose alternate hash types. +++ +++ classic: same as in sch_sfq.c +++ dst: destination IP address +++ src: source IP address +++ ctorigdst: original destination IP address +++ ctorigsrc: original source IP address +++ ctrepldst: reply destination IP address +++ ctreplsrc: reply source IP +++ ctnatchg: use the address which changed via nat +++ +++*/ +++ +++ +++/* This type should contain at least SFQ_DEPTH*2 values */ +++typedef unsigned int esfq_index; +++ +++struct esfq_head +++{ +++ esfq_index next; +++ esfq_index prev; +++}; +++ +++struct esfq_sched_data +++{ +++/* Parameters */ +++ int perturb_period; +++ unsigned quantum; /* Allotment per round: MUST BE >= MTU */ +++ int limit; +++ unsigned depth; +++ unsigned hash_divisor; +++ unsigned hash_kind; +++/* Variables */ +++ struct timer_list perturb_timer; +++ int perturbation; +++ esfq_index tail; /* Index of current slot in round */ +++ esfq_index max_depth; /* Maximal depth */ +++ +++ esfq_index *ht; /* Hash table */ +++ esfq_index *next; /* Active slots link */ +++ short *allot; /* Current allotment per slot */ +++ unsigned short *hash; /* Hash value indexed by slots */ +++ struct sk_buff_head *qs; /* Slot queue */ +++ struct esfq_head *dep; /* Linked list of slots, indexed by depth */ +++ unsigned dyn_min; /* For dynamic divisor adjustment; minimum value seen */ +++ unsigned dyn_max; /* maximum value seen */ +++ unsigned dyn_range; /* saved range */ +++}; +++ +++/* This contains the info we will hash. */ +++struct esfq_packet_info +++{ +++ u32 proto; /* protocol or port */ +++ u32 src; /* source from packet header */ +++ u32 dst; /* destination from packet header */ +++ u32 ctorigsrc; /* original source from conntrack */ +++ u32 ctorigdst; /* original destination from conntrack */ +++ u32 ctreplsrc; /* reply source from conntrack */ +++ u32 ctrepldst; /* reply destination from conntrack */ +++}; +++ +++static __inline__ unsigned esfq_jhash_1word(struct esfq_sched_data *q,u32 a) +++{ +++ return jhash_1word(a, q->perturbation) & (q->hash_divisor-1); +++} +++ +++static __inline__ unsigned esfq_jhash_2words(struct esfq_sched_data *q, u32 a, u32 b) +++{ +++ return jhash_2words(a, b, q->perturbation) & (q->hash_divisor-1); +++} +++ +++static __inline__ unsigned esfq_jhash_3words(struct esfq_sched_data *q, u32 a, u32 b, u32 c) +++{ +++ return jhash_3words(a, b, c, q->perturbation) & (q->hash_divisor-1); +++} +++ +++ +++static unsigned esfq_hash(struct esfq_sched_data *q, struct sk_buff *skb) +++{ +++ struct esfq_packet_info info; +++#ifdef CONFIG_IP_NF_CONNTRACK +++ enum ip_conntrack_info ctinfo; +++ struct ip_conntrack *ct = ip_conntrack_get(skb, &ctinfo); +++#endif +++ +++ switch (skb->protocol) { +++ case __constant_htons(ETH_P_IP): +++ { +++ struct iphdr *iph = skb->nh.iph; +++ info.dst = iph->daddr; +++ info.src = iph->saddr; +++ if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) && +++ (iph->protocol == IPPROTO_TCP || +++ iph->protocol == IPPROTO_UDP || +++ iph->protocol == IPPROTO_SCTP || +++ iph->protocol == IPPROTO_DCCP || +++ iph->protocol == IPPROTO_ESP)) +++ info.proto = *(((u32*)iph) + iph->ihl); +++ else +++ info.proto = iph->protocol; +++ break; +++ } +++ default: +++ info.dst = (u32)(unsigned long)skb->dst; +++ info.src = (u32)(unsigned long)skb->sk; +++ info.proto = skb->protocol; +++ } +++ +++#ifdef CONFIG_IP_NF_CONNTRACK +++ /* defaults if there is no conntrack info */ +++ info.ctorigsrc = info.src; +++ info.ctorigdst = info.dst; +++ info.ctreplsrc = info.dst; +++ info.ctrepldst = info.src; +++ /* collect conntrack info */ +++ IP_NF_ASSERT(ct); +++ if (ct) { +++ if (skb->protocol == __constant_htons(ETH_P_IP)) { +++ info.ctorigsrc = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip; +++ info.ctorigdst = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip; +++ info.ctreplsrc = ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip; +++ info.ctrepldst = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip; +++ } +++ } +++#endif +++ +++ switch(q->hash_kind) +++ { +++ case TCA_SFQ_HASH_CLASSIC: +++ return esfq_jhash_3words(q, info.dst, info.src, info.proto); +++ case TCA_SFQ_HASH_DST: +++ return esfq_jhash_1word(q, info.dst); +++ case TCA_SFQ_HASH_SRC: +++ return esfq_jhash_1word(q, info.src); +++#ifdef CONFIG_IP_NF_CONNTRACK +++ case TCA_SFQ_HASH_CTORIGDST: +++ return esfq_jhash_1word(q, info.ctorigdst); +++ case TCA_SFQ_HASH_CTORIGSRC: +++ return esfq_jhash_1word(q, info.ctorigsrc); +++ case TCA_SFQ_HASH_CTREPLDST: +++ return esfq_jhash_1word(q, info.ctrepldst); +++ case TCA_SFQ_HASH_CTREPLSRC: +++ return esfq_jhash_1word(q, info.ctreplsrc); +++ case TCA_SFQ_HASH_CTNATCHG: +++ { +++ if (info.ctorigdst == info.ctreplsrc) +++ return esfq_jhash_1word(q, info.ctorigsrc); +++ else +++ return esfq_jhash_1word(q, info.ctreplsrc); +++ } +++#endif +++ default: +++ if (net_ratelimit()) +++ printk(KERN_WARNING "ESFQ: Unknown hash method. Falling back to classic.\n"); +++ } +++ return esfq_jhash_3words(q, info.dst, info.src, info.proto); +++} +++ +++static inline void esfq_link(struct esfq_sched_data *q, esfq_index x) +++{ +++ esfq_index p, n; +++ int d = q->qs[x].qlen + q->depth; +++ +++ p = d; +++ n = q->dep[d].next; +++ q->dep[x].next = n; +++ q->dep[x].prev = p; +++ q->dep[p].next = q->dep[n].prev = x; +++} +++ +++static inline void esfq_dec(struct esfq_sched_data *q, esfq_index x) +++{ +++ esfq_index p, n; +++ +++ n = q->dep[x].next; +++ p = q->dep[x].prev; +++ q->dep[p].next = n; +++ q->dep[n].prev = p; +++ +++ if (n == p && q->max_depth == q->qs[x].qlen + 1) +++ q->max_depth--; +++ +++ esfq_link(q, x); +++} +++ +++static inline void esfq_inc(struct esfq_sched_data *q, esfq_index x) +++{ +++ esfq_index p, n; +++ int d; +++ +++ n = q->dep[x].next; +++ p = q->dep[x].prev; +++ q->dep[p].next = n; +++ q->dep[n].prev = p; +++ d = q->qs[x].qlen; +++ if (q->max_depth < d) +++ q->max_depth = d; +++ +++ esfq_link(q, x); +++} +++ +++static unsigned int esfq_drop(struct Qdisc *sch) +++{ +++ struct esfq_sched_data *q = qdisc_priv(sch); +++ esfq_index d = q->max_depth; +++ struct sk_buff *skb; +++ unsigned int len; +++ +++ /* Queue is full! Find the longest slot and +++ drop a packet from it */ +++ +++ if (d > 1) { +++ esfq_index x = q->dep[d+q->depth].next; +++ skb = q->qs[x].prev; +++ len = skb->len; +++ __skb_unlink(skb, &q->qs[x]); +++ kfree_skb(skb); +++ esfq_dec(q, x); +++ sch->q.qlen--; +++ sch->stats.drops++; +++ sch->stats.backlog -= len; +++ return len; +++ } +++ +++ if (d == 1) { +++ /* It is difficult to believe, but ALL THE SLOTS HAVE LENGTH 1. */ +++ d = q->next[q->tail]; +++ q->next[q->tail] = q->next[d]; +++ q->allot[q->next[d]] += q->quantum; +++ skb = q->qs[d].prev; +++ len = skb->len; +++ __skb_unlink(skb, &q->qs[d]); +++ kfree_skb(skb); +++ esfq_dec(q, d); +++ sch->q.qlen--; +++ q->ht[q->hash[d]] = q->depth; +++ sch->stats.drops++; +++ sch->stats.backlog -= len; +++ return len; +++ } +++ +++ return 0; +++} +++ +++static int +++esfq_enqueue(struct sk_buff *skb, struct Qdisc* sch) +++{ +++ struct esfq_sched_data *q = qdisc_priv(sch); +++ unsigned hash = esfq_hash(q, skb); +++ unsigned depth = q->depth; +++ esfq_index x; +++ +++ x = q->ht[hash]; +++ if (x == depth) { +++ q->ht[hash] = x = q->dep[depth].next; +++ q->hash[x] = hash; +++ } +++ sch->stats.backlog += skb->len; +++ __skb_queue_tail(&q->qs[x], skb); +++ esfq_inc(q, x); +++ if (q->qs[x].qlen == 1) { /* The flow is new */ +++ if (q->tail == depth) { /* It is the first flow */ +++ q->tail = x; +++ q->next[x] = x; +++ q->allot[x] = q->quantum; +++ } else { +++ q->next[x] = q->next[q->tail]; +++ q->next[q->tail] = x; +++ q->tail = x; +++ } +++ } +++ if (++sch->q.qlen < q->limit-1) { +++ sch->stats.bytes += skb->len; +++ sch->stats.packets++; +++ return 0; +++ } +++ +++ esfq_drop(sch); +++ return NET_XMIT_CN; +++} +++ +++static int +++esfq_requeue(struct sk_buff *skb, struct Qdisc* sch) +++{ +++ struct esfq_sched_data *q = qdisc_priv(sch); +++ unsigned hash = esfq_hash(q, skb); +++ unsigned depth = q->depth; +++ esfq_index x; +++ +++ x = q->ht[hash]; +++ if (x == depth) { +++ q->ht[hash] = x = q->dep[depth].next; +++ q->hash[x] = hash; +++ } +++ sch->stats.backlog += skb->len; +++ __skb_queue_head(&q->qs[x], skb); +++ esfq_inc(q, x); +++ if (q->qs[x].qlen == 1) { /* The flow is new */ +++ if (q->tail == depth) { /* It is the first flow */ +++ q->tail = x; +++ q->next[x] = x; +++ q->allot[x] = q->quantum; +++ } else { +++ q->next[x] = q->next[q->tail]; +++ q->next[q->tail] = x; +++ q->tail = x; +++ } +++ } +++ if (++sch->q.qlen < q->limit - 1) { +++ return 0; +++ } +++ +++ sch->stats.drops++; +++ esfq_drop(sch); +++ return NET_XMIT_CN; +++} +++ +++ +++ +++ +++static struct sk_buff * +++esfq_dequeue(struct Qdisc* sch) +++{ +++ struct esfq_sched_data *q = qdisc_priv(sch); +++ struct sk_buff *skb; +++ unsigned depth = q->depth; +++ esfq_index a, old_a; +++ +++ /* No active slots */ +++ if (q->tail == depth) +++ return NULL; +++ +++ a = old_a = q->next[q->tail]; +++ +++ /* Grab packet */ +++ skb = __skb_dequeue(&q->qs[a]); +++ esfq_dec(q, a); +++ sch->q.qlen--; +++ sch->stats.backlog -= skb->len; +++ +++ /* Is the slot empty? */ +++ if (q->qs[a].qlen == 0) { +++ q->ht[q->hash[a]] = depth; +++ a = q->next[a]; +++ if (a == old_a) { +++ q->tail = depth; +++ return skb; +++ } +++ q->next[q->tail] = a; +++ q->allot[a] += q->quantum; +++ } else if ((q->allot[a] -= skb->len) <= 0) { +++ q->tail = a; +++ a = q->next[a]; +++ q->allot[a] += q->quantum; +++ } +++ +++ return skb; +++} +++ +++static void +++esfq_reset(struct Qdisc* sch) +++{ +++ struct sk_buff *skb; +++ +++ while ((skb = esfq_dequeue(sch)) != NULL) +++ kfree_skb(skb); +++} +++ +++static void esfq_perturbation(unsigned long arg) +++{ +++ struct Qdisc *sch = (struct Qdisc*)arg; +++ struct esfq_sched_data *q = qdisc_priv(sch); +++ +++ q->perturbation = net_random()&0x1F; +++ +++ if (q->perturb_period) { +++ q->perturb_timer.expires = jiffies + q->perturb_period; +++ add_timer(&q->perturb_timer); +++ } +++} +++ +++static int esfq_change(struct Qdisc *sch, struct rtattr *opt) +++{ +++ struct esfq_sched_data *q = qdisc_priv(sch); +++ struct tc_esfq_qopt *ctl = RTA_DATA(opt); +++ int old_perturb = q->perturb_period; +++ +++ if (opt->rta_len < RTA_LENGTH(sizeof(*ctl))) +++ return -EINVAL; +++ +++ sch_tree_lock(sch); +++ q->quantum = ctl->quantum ? : psched_mtu(sch->dev); +++ q->perturb_period = ctl->perturb_period*HZ; +++// q->hash_divisor = ctl->divisor; +++// q->tail = q->limit = q->depth = ctl->flows; +++ +++ if (ctl->limit) +++ q->limit = min_t(u32, ctl->limit, q->depth); +++ +++ if (ctl->hash_kind) { +++ q->hash_kind = ctl->hash_kind; +++ if (q->hash_kind != TCA_SFQ_HASH_CLASSIC) +++ q->perturb_period = 0; +++ } +++ +++ // is sch_tree_lock enough to do this ? +++ while (sch->q.qlen >= q->limit-1) +++ esfq_drop(sch); +++ +++ if (old_perturb) +++ del_timer(&q->perturb_timer); +++ if (q->perturb_period) { +++ q->perturb_timer.expires = jiffies + q->perturb_period; +++ add_timer(&q->perturb_timer); +++ } else { +++ q->perturbation = 0; +++ } +++ sch_tree_unlock(sch); +++ return 0; +++} +++ +++static int esfq_init(struct Qdisc *sch, struct rtattr *opt) +++{ +++ struct esfq_sched_data *q = qdisc_priv(sch); +++ struct tc_esfq_qopt *ctl; +++ esfq_index p = ~0U/2; +++ int i; +++ +++ if (opt && opt->rta_len < RTA_LENGTH(sizeof(*ctl))) +++ return -EINVAL; +++ +++ init_timer(&q->perturb_timer); +++ q->perturb_timer.data = (unsigned long)sch; +++ q->perturb_timer.function = esfq_perturbation; +++ q->perturbation = 0; +++ q->hash_kind = TCA_SFQ_HASH_CLASSIC; +++ q->max_depth = 0; +++ q->dyn_min = ~0U; /* maximum value for this type */ +++ q->dyn_max = 0; /* dyn_min/dyn_max will be set properly upon first packet */ +++ if (opt == NULL) { +++ q->quantum = psched_mtu(sch->dev); +++ q->perturb_period = 0; +++ q->hash_divisor = 1024; +++ q->tail = q->limit = q->depth = 128; +++ +++ } else { +++ ctl = RTA_DATA(opt); +++ q->quantum = ctl->quantum ? : psched_mtu(sch->dev); +++ q->perturb_period = ctl->perturb_period*HZ; +++ q->hash_divisor = ctl->divisor ? : 1024; +++ q->tail = q->limit = q->depth = ctl->flows ? : 128; +++ +++ if ( q->depth > p - 1 ) +++ return -EINVAL; +++ +++ if (ctl->limit) +++ q->limit = min_t(u32, ctl->limit, q->depth); +++ +++ if (ctl->hash_kind) { +++ q->hash_kind = ctl->hash_kind; +++ } +++ +++ if (q->perturb_period) { +++ q->perturb_timer.expires = jiffies + q->perturb_period; +++ add_timer(&q->perturb_timer); +++ } +++ } +++ +++ q->ht = kmalloc(q->hash_divisor*sizeof(esfq_index), GFP_KERNEL); +++ if (!q->ht) +++ goto err_case; +++ +++ q->dep = kmalloc((1+q->depth*2)*sizeof(struct esfq_head), GFP_KERNEL); +++ if (!q->dep) +++ goto err_case; +++ q->next = kmalloc(q->depth*sizeof(esfq_index), GFP_KERNEL); +++ if (!q->next) +++ goto err_case; +++ +++ q->allot = kmalloc(q->depth*sizeof(short), GFP_KERNEL); +++ if (!q->allot) +++ goto err_case; +++ q->hash = kmalloc(q->depth*sizeof(unsigned short), GFP_KERNEL); +++ if (!q->hash) +++ goto err_case; +++ q->qs = kmalloc(q->depth*sizeof(struct sk_buff_head), GFP_KERNEL); +++ if (!q->qs) +++ goto err_case; +++ +++ for (i=0; i< q->hash_divisor; i++) +++ q->ht[i] = q->depth; +++ for (i=0; idepth; i++) { +++ skb_queue_head_init(&q->qs[i]); +++ q->dep[i+q->depth].next = i+q->depth; +++ q->dep[i+q->depth].prev = i+q->depth; +++ } +++ +++ for (i=0; idepth; i++) +++ esfq_link(q, i); +++ return 0; +++err_case: +++ del_timer(&q->perturb_timer); +++ if (q->ht) +++ kfree(q->ht); +++ if (q->dep) +++ kfree(q->dep); +++ if (q->next) +++ kfree(q->next); +++ if (q->allot) +++ kfree(q->allot); +++ if (q->hash) +++ kfree(q->hash); +++ if (q->qs) +++ kfree(q->qs); +++ return -ENOBUFS; +++} +++ +++static void esfq_destroy(struct Qdisc *sch) +++{ +++ struct esfq_sched_data *q = qdisc_priv(sch); +++ del_timer(&q->perturb_timer); +++ if(q->ht) +++ kfree(q->ht); +++ if(q->dep) +++ kfree(q->dep); +++ if(q->next) +++ kfree(q->next); +++ if(q->allot) +++ kfree(q->allot); +++ if(q->hash) +++ kfree(q->hash); +++ if(q->qs) +++ kfree(q->qs); +++} +++ +++static int esfq_dump(struct Qdisc *sch, struct sk_buff *skb) +++{ +++ struct esfq_sched_data *q = qdisc_priv(sch); +++ unsigned char *b = skb->tail; +++ struct tc_esfq_qopt opt; +++ +++ opt.quantum = q->quantum; +++ opt.perturb_period = q->perturb_period/HZ; +++ +++ opt.limit = q->limit; +++ opt.divisor = q->hash_divisor; +++ opt.flows = q->depth; +++ opt.hash_kind = q->hash_kind; +++ +++ RTA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); +++ +++ return skb->len; +++ +++rtattr_failure: +++ skb_trim(skb, b - skb->data); +++ return -1; +++} +++ +++static struct Qdisc_ops esfq_qdisc_ops = +++{ +++ .next = NULL, +++ .cl_ops = NULL, +++ .id = "esfq", +++ .priv_size = sizeof(struct esfq_sched_data), +++ .enqueue = esfq_enqueue, +++ .dequeue = esfq_dequeue, +++ .requeue = esfq_requeue, +++ .drop = esfq_drop, +++ .init = esfq_init, +++ .reset = esfq_reset, +++ .destroy = esfq_destroy, +++ .change = NULL, /* esfq_change - needs more work */ +++ .dump = esfq_dump, +++}; +++ +++static int __init esfq_module_init(void) +++{ +++ return register_qdisc(&esfq_qdisc_ops); +++} +++static void __exit esfq_module_exit(void) +++{ +++ unregister_qdisc(&esfq_qdisc_ops); +++} +++module_init(esfq_module_init) +++module_exit(esfq_module_exit) +++MODULE_LICENSE("GPL");