ss源代码调试&原理分析

源代码调试

ss是位于iproute2这个库中,可以从iproute2上面下载到源代码,配置其源代码调试的方式和netstat源代码调试这篇文章一样.

在根目录下创建CMakeLists.txt文件,内容如下:

cmake_minimum_required(VERSION 3.13)
project(test C)
 
set(BUILD_DIR .)
 
#add_executable()
add_custom_target(ss command -c ${BUILD_DIR})

同时修改Makefile文件中的45行的CCOPTS = -O2CCOPTS = -O0 -g3

在clion中配置Target:

clion-settings.png

Netid  State      Recv-Q Send-Q Local Address:Port             Peer Address:Port
tcp    ESTAB      0      0      127.0.0.1:57354                127.0.0.1:socks               
tcp    ESTAB      0      0      127.0.0.1:37350                127.0.0.1:socks               
tcp    ESTAB      0      0      172.16.40.154:43450                45.8.223.61:17250               
tcp    CLOSE-WAIT 1      0      127.0.0.1:57398                127.0.0.1:socks               
tcp    ESTAB      0      0      127.0.0.1:57062                127.0.0.1:socks

和直接运行ss命令得到的结果一样.接下来就是分析整个ss程序的执行流程

main

main函数就是用于对各种选项进行解析,并以此判断执行什么函数.

int main(int argc, char *argv[])
{
    int saw_states = 0;
    int saw_query = 0;
    int do_summary = 0;
    const char *dump_tcpdiag = NULL;
    FILE *filter_fp = NULL;
    int ch;
    int state_filter = 0;
    int addrp_width, screen_width = 80;
 
    while ((ch = getopt_long(argc, argv,
                 "dhaletuwxnro460spbEf:miA:D:F:vVzZN:KHS",
                 long_opts, NULL)) != EOF) {
        switch (ch) {
        case 'n':
            resolve_services = 0;
            break;
        ......
        }
        .....
    }

在默认情况下,会进入到如下代码中

if (do_default) {
    state_filter = state_filter ? state_filter : SS_CONN;
    filter_default_dbs(&current_filter);
}

程序会执行filter_default_dbs()函数,设置默认的过滤条件.

filter_default_dbs

static void filter_default_dbs(struct filter *f) {
    filter_db_set(f, UDP_DB);
    filter_db_set(f, DCCP_DB);
    filter_db_set(f, TCP_DB);
    filter_db_set(f, RAW_DB);
    filter_db_set(f, UNIX_ST_DB);
    filter_db_set(f, UNIX_DG_DB);
    filter_db_set(f, UNIX_SQ_DB);
    filter_db_set(f, PACKET_R_DB);
    filter_db_set(f, PACKET_DG_DB);
    filter_db_set(f, NETLINK_DB);
    filter_db_set(f, SCTP_DB);
}

ilter_default_dbs很简单就是在默认情况下设置的过滤条件.

之后程序会执行到unix_show(&current_filter);

unix_show

函数代码如下:

static void filter_default_dbs(struct filter *f) {
    filter_db_set(f, UDP_DB);
    filter_db_set(f, DCCP_DB);
    filter_db_set(f, TCP_DB);
    filter_db_set(f, RAW_DB);
    filter_db_set(f, UNIX_ST_DB);
    filter_db_set(f, UNIX_DG_DB);
    filter_db_set(f, UNIX_SQ_DB);
    filter_db_set(f, PACKET_R_DB);
    filter_db_set(f, PACKET_DG_DB);
    filter_db_set(f, NETLINK_DB);
    filter_db_set(f, SCTP_DB);
}
filter_default_dbs很简单就是在默认情况下设置的过滤条件.

之后程序会执行到unix_show(&current_filter);

unix_show
函数代码如下:

unix_show  Collapse source
static int unix_show(struct filter *f)
{
    FILE *fp;
    char buf[256];
    char name[128];
    int  newformat = 0;
    int  cnt;
    struct sockstat *list = NULL;
    const int unix_state_map[] = { SS_CLOSE, SS_SYN_SENT,
                       SS_ESTABLISHED, SS_CLOSING };
 
    if (!filter_af_get(f, AF_UNIX))
        return 0;
 
    if (!getenv("PROC_NET_UNIX") && !getenv("PROC_ROOT")
        && unix_show_netlink(f) == 0)
        return 0;
 
    if ((fp = net_unix_open()) == NULL)
        return -1;
    if (!fgets(buf, sizeof(buf), fp)) {
        fclose(fp);
        return -1;
    }
 
    if (memcmp(buf, "Peer", 4) == 0)
        newformat = 1;
    cnt = 0;
 
    while (fgets(buf, sizeof(buf), fp)) {
        struct sockstat *u, **insp;
        int flags;
 
        if (!(u = calloc(1, sizeof(*u))))
            break;
 
        if (sscanf(buf, "%x: %x %x %x %x %x %d %s",
               &u->rport, &u->rq, &u->wq, &flags, &u->type,
               &u->state, &u->ino, name) < 8)
            name[0] = 0;
 
        u->lport = u->ino;
        u->local.family = u->remote.family = AF_UNIX;
 
        if (flags & (1 << 16)) {
            u->state = SS_LISTEN;
        } else if (u->state > 0 &&
               u->state <= ARRAY_SIZE(unix_state_map)) {
            u->state = unix_state_map[u->state-1];
            if (u->type == SOCK_DGRAM && u->state == SS_CLOSE && u->rport)
                u->state = SS_ESTABLISHED;
        }
        if (unix_type_skip(u, f) ||
            !(f->states & (1 << u->state))) {
            free(u);
            continue;
        }
 
        if (!newformat) {
            u->rport = 0;
            u->rq = 0;
            u->wq = 0;
        }
 
        if (name[0]) {
            u->name = strdup(name);
            if (!u->name) {
                free(u);
                break;
            }
        }
 
        if (u->rport) {
            struct sockstat *p;
 
            for (p = list; p; p = p->next) {
                if (u->rport == p->lport)
                    break;
            }
            if (!p)
                u->peer_name = "?";
            else
                u->peer_name = p->name ? : "*";
        }
 
        if (f->f) {
            struct sockstat st = {
                .local.family = AF_UNIX,
                .remote.family = AF_UNIX,
            };
 
            memcpy(st.local.data, &u->name, sizeof(u->name));
            if (strcmp(u->peer_name, "*"))
                memcpy(st.remote.data, &u->peer_name,
                       sizeof(u->peer_name));
            if (run_ssfilter(f->f, &st) == 0) {
                free(u->name);
                free(u);
                continue;
            }
        }
 
        insp = &list;
        while (*insp) {
            if (u->type < (*insp)->type ||
                (u->type == (*insp)->type &&
                 u->ino < (*insp)->ino))
                break;
            insp = &(*insp)->next;
        }
        u->next = *insp;
        *insp = u;
 
        if (++cnt > MAX_UNIX_REMEMBER) {
            while (list) {
                unix_stats_print(list, f);
                printf("\n");
 
                unix_list_drop_first(&list);
            }
            cnt = 0;
        }
    }
    fclose(fp);
    while (list) {
        unix_stats_print(list, f);
        printf("\n");
 
        unix_list_drop_first(&list);
    }
 
    return 0;
}

这个函数就是解析网络数据的核心函数.代码较多,还是分布分析这些代码.

unix_show_netlink

if (!getenv("PROC_NET_UNIX") && !getenv("PROC_ROOT")
       && unix_show_netlink(f) == 0)
       return 0;
  • getenv判断PROC_NET_UNIXPROC_ROOT是否存在
  • unix_show_netlink(f)创建netlink

追踪进入到unix_show_netlink()

static int unix_show_netlink(struct filter *f)
{
    DIAG_REQUEST(req, struct unix_diag_req r);
 
    req.r.sdiag_family = AF_UNIX;
    req.r.udiag_states = f->states;
    req.r.udiag_show = UDIAG_SHOW_NAME | UDIAG_SHOW_PEER | UDIAG_SHOW_RQLEN;
    if (show_mem)
        req.r.udiag_show |= UDIAG_SHOW_MEMINFO;
 
    return handle_netlink_request(f, &req.nlh, sizeof(req), unix_show_sock);
}

f是一个filter,用于设置一些简单的过滤条件.

req.r.sdiag_family = AF_UNIX;
req.r.udiag_states = f->states;
req.r.udiag_show = UDIAG_SHOW_NAME | UDIAG_SHOW_PEER | UDIAG_SHOW_RQLEN;

是用于设置diag_netnetlink的请求头,之后调用handle_netlink_request(f, &req.nlh, sizeof(req),unix_show_sock);

handle_netlink_request

跟踪进入到handle_netlink_request的实现

static int handle_netlink_request(struct filter *f, struct nlmsghdr *req,
        size_t size, rtnl_filter_t show_one_sock)
{
    int ret = -1;
    struct rtnl_handle rth;
 
    if (rtnl_open_byproto(&rth, 0, NETLINK_SOCK_DIAG))
        return -1;
 
    rth.dump = MAGIC_SEQ;
 
    if (rtnl_send(&rth, req, size) < 0)
        goto Exit;
 
    if (rtnl_dump_filter(&rth, show_one_sock, f))
        goto Exit;
 
    ret = 0;
Exit:
    rtnl_close(&rth);
    return ret;
}
  • 调用rtnl_send(&rth, req, size)用于发送diag_netnetlink的消息头.
  • rtnl_dump_filter(&rth, show_one_sock,f)获取netlink的返回消息,回调show_one_sock()函数.

rtnl_send

跟踪进入到lib/libnetlink.c

int rtnl_send(struct rtnl_handle *rth, const void *buf, int len)
{
    return send(rth->fd, buf, len, 0);
}

rtnl_send直接调用send()方法发送信息.

rtnl_dump_filter

跟踪进入到lib/libnetlink.c

int rtnl_dump_filter_nc(struct rtnl_handle *rth,
             rtnl_filter_t filter,
             void *arg1, __u16 nc_flags)
{
    const struct rtnl_dump_filter_arg a[2] = {
        { .filter = filter, .arg1 = arg1, .nc_flags = nc_flags, },
        { .filter = NULL,   .arg1 = NULL, .nc_flags = 0, },
    };
 
    return rtnl_dump_filter_l(rth, a);
}

rtnl_dump_filter_nc()中设置rtnl_dump_filter_arg过滤函数,之后调用rtnl_dump_filter_l()

int rtnl_dump_filter_l(struct rtnl_handle *rth,
               const struct rtnl_dump_filter_arg *arg)
{
    struct sockaddr_nl nladdr;
    struct iovec iov;
    struct msghdr msg = {
        .msg_name = &nladdr,
        .msg_namelen = sizeof(nladdr),
        .msg_iov = &iov,
        .msg_iovlen = 1,
    };
    char buf[32768];
    int dump_intr = 0;
 
    iov.iov_base = buf;
    while (1) {
        int status;
        const struct rtnl_dump_filter_arg *a;
        int found_done = 0;
        int msglen = 0;
 
        iov.iov_len = sizeof(buf);
        status = recvmsg(rth->fd, &msg, 0);
 
        if (status < 0) {
            if (errno == EINTR || errno == EAGAIN)
                continue;
            fprintf(stderr, "netlink receive error %s (%d)\n",
                strerror(errno), errno);
            return -1;
        }
 
        if (status == 0) {
            fprintf(stderr, "EOF on netlink\n");
            return -1;
        }
 
        if (rth->dump_fp)
            fwrite(buf, 1, NLMSG_ALIGN(status), rth->dump_fp);
 
        for (a = arg; a->filter; a++) {
            struct nlmsghdr *h = (struct nlmsghdr *)buf;
 
            msglen = status;
 
            while (NLMSG_OK(h, msglen)) {
                int err = 0;
 
                h->nlmsg_flags &= ~a->nc_flags;
 
                if (nladdr.nl_pid != 0 ||
                    h->nlmsg_pid != rth->local.nl_pid ||
                    h->nlmsg_seq != rth->dump)
                    goto skip_it;
 
                if (h->nlmsg_flags & NLM_F_DUMP_INTR)
                    dump_intr = 1;
 
                if (h->nlmsg_type == NLMSG_DONE) {
                    err = rtnl_dump_done(h);
                    if (err < 0)
                        return -1;
 
                    found_done = 1;
                    break; /* process next filter */
                }
 
                if (h->nlmsg_type == NLMSG_ERROR) {
                    rtnl_dump_error(rth, h);
                    return -1;
                }
 
                if (!rth->dump_fp) {
                    err = a->filter(&nladdr, h, a->arg1);
                    if (err < 0)
                        return err;
                }
 
skip_it:
                h = NLMSG_NEXT(h, msglen);
            }
        }
 
        if (found_done) {
            if (dump_intr)
                fprintf(stderr,
                    "Dump was interrupted and may be inconsistent.\n");
            return 0;
        }
 
        if (msg.msg_flags & MSG_TRUNC) {
            fprintf(stderr, "Message truncated\n");
            continue;
        }
        if (msglen) {
            fprintf(stderr, "!!!Remnant of size %d\n", msglen);
            exit(1);
        }
    }
}

rtnl_dump_filter_l()实现了通过netlink获取数据,然后根据rtnl_dump_filter_arg过滤数据.

获取数据:

struct sockaddr_nl nladdr;
struct iovec iov;
struct msghdr msg = {
    .msg_name = &nladdr,
    .msg_namelen = sizeof(nladdr),
    .msg_iov = &iov,
    .msg_iovlen = 1,
};
.....
status = recvmsg(rth->fd, &msg, 0);

过滤数据:

for (a = arg; a->filter; a++) {
    struct nlmsghdr *h = (struct nlmsghdr *)buf;
    .....
    h->nlmsg_flags &= ~a->nc_flags;
    if (nladdr.nl_pid != 0 ||
                h->nlmsg_pid != rth->local.nl_pid ||
                h->nlmsg_seq != rth->dump)
                goto skip_it;
 
            if (h->nlmsg_flags & NLM_F_DUMP_INTR)
                dump_intr = 1;
 
            if (h->nlmsg_type == NLMSG_DONE) {
                err = rtnl_dump_done(h);
                if (err < 0)
                    return -1;
 
                found_done = 1;
                break; /* process next filter */
            }
            .......

之前说过,handle_netlink_request(f, &req.nlh, sizeof(req), unix_show_sock);程序最终会回调unix_show_sock函数.

unix_show_sock

跟踪unix_show_sock的实现

static int unix_show_sock(const struct sockaddr_nl *addr, struct nlmsghdr *nlh,
        void *arg)
{
    struct filter *f = (struct filter *)arg;
    struct unix_diag_msg *r = NLMSG_DATA(nlh);
    struct rtattr *tb[UNIX_DIAG_MAX+1];
    char name[128];
    struct sockstat stat = { .name = "*", .peer_name = "*" };
 
    parse_rtattr(tb, UNIX_DIAG_MAX, (struct rtattr *)(r+1),
             nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*r)));
 
    stat.type  = r->udiag_type;
    stat.state = r->udiag_state;
    stat.ino   = stat.lport = r->udiag_ino;
    stat.local.family = stat.remote.family = AF_UNIX;
 
    if (unix_type_skip(&stat, f))
        return 0;
 
    if (tb[UNIX_DIAG_RQLEN]) {
        struct unix_diag_rqlen *rql = RTA_DATA(tb[UNIX_DIAG_RQLEN]);
 
        stat.rq = rql->udiag_rqueue;
        stat.wq = rql->udiag_wqueue;
    }
    if (tb[UNIX_DIAG_NAME]) {
        int len = RTA_PAYLOAD(tb[UNIX_DIAG_NAME]);
 
        memcpy(name, RTA_DATA(tb[UNIX_DIAG_NAME]), len);
        name[len] = '\0';
        if (name[0] == '\0') {
            int i;
            for (i = 0; i < len; i++)
                if (name[i] == '\0')
                    name[i] = '@';
        }
        stat.name = &name[0];
        memcpy(stat.local.data, &stat.name, sizeof(stat.name));
    }
    if (tb[UNIX_DIAG_PEER])
        stat.rport = rta_getattr_u32(tb[UNIX_DIAG_PEER]);
 
    if (f->f && run_ssfilter(f->f, &stat) == 0)
        return 0;
 
    unix_stats_print(&stat, f);
 
    if (show_mem)
        print_skmeminfo(tb, UNIX_DIAG_MEMINFO);
    if (show_details) {
        if (tb[UNIX_DIAG_SHUTDOWN]) {
            unsigned char mask;
 
            mask = rta_getattr_u8(tb[UNIX_DIAG_SHUTDOWN]);
            printf(" %c-%c", mask & 1 ? '-' : '<', mask & 2 ? '-' : '>');
        }
    }
    printf("\n");
 
    return 0;
}

1.struct unix_diag_msg *r = NLMSG_DATA(nlh); parse_rtattr(tb, UNIX_DIAG_MAX, (struct rtattr *)(r+1),nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*r)));获取netlink的数据

2.解析数据并赋值

stat.type  = r->udiag_type;
stat.state = r->udiag_state;
stat.ino   = stat.lport = r->udiag_ino;
stat.local.family = stat.remote.family = AF_UNIX;
-------------------------------------------------
stat.rq = rql->udiag_rqueue;
stat.wq = rql->udiag_wqueue;

unix_stats_print

unix_stats_print(&stat, f);获取网络的连接状态

static void unix_stats_print(struct sockstat *s, struct filter *f)
{
    char port_name[30] = {};
 
    sock_state_print(s);
 
    sock_addr_print(s->name ?: "*", " ",
            int_to_str(s->lport, port_name), NULL);
    sock_addr_print(s->peer_name ?: "*", " ",
            int_to_str(s->rport, port_name), NULL);
 
    proc_ctx_print(s);
}

sock_state_print

跟踪进入到sock_state_print()

static void sock_state_print(struct sockstat *s)
{
    const char *sock_name;
    static const char * const sstate_name[] = {
        "UNKNOWN",
        [SS_ESTABLISHED] = "ESTAB",
        [SS_SYN_SENT] = "SYN-SENT",
        [SS_SYN_RECV] = "SYN-RECV",
        [SS_FIN_WAIT1] = "FIN-WAIT-1",
        [SS_FIN_WAIT2] = "FIN-WAIT-2",
        [SS_TIME_WAIT] = "TIME-WAIT",
        [SS_CLOSE] = "UNCONN",
        [SS_CLOSE_WAIT] = "CLOSE-WAIT",
        [SS_LAST_ACK] = "LAST-ACK",
        [SS_LISTEN] =   "LISTEN",
        [SS_CLOSING] = "CLOSING",
    };
 
    switch (s->local.family) {
    case AF_UNIX:
        sock_name = unix_netid_name(s->type);
        break;
    case AF_INET:
    case AF_INET6:
        sock_name = proto_name(s->type);
        break;
    case AF_PACKET:
        sock_name = s->type == SOCK_RAW ? "p_raw" : "p_dgr";
        break;
    case AF_NETLINK:
        sock_name = "nl";
        break;
    default:
        sock_name = "unknown";
    }
 
    if (netid_width)
        printf("%-*s ", netid_width,
               is_sctp_assoc(s, sock_name) ? "" : sock_name);
    if (state_width) {
        if (is_sctp_assoc(s, sock_name))
            printf("`- %-*s ", state_width - 3,
                   sctp_sstate_name[s->state]);
        else
            printf("%-*s ", state_width, sstate_name[s->state]);
    }
 
    printf("%-6d %-6d ", s->rq, s->wq);
}

根据s→local.family分别输出对应的内容,代码就不做过多的解释了,就是简单的switch case的判断.全部执行完毕之后,输出的结果是:

Netid  State      Recv-Q Send-Q Local Address:Port                 Peer Address:Port               
u_seq  ESTAB      0      0      @00017 309855                * 309856

可以发现其实在ss的默认输出情况下也是没有pid信息.如果我们采用ss -p,结果是:

etid  State      Recv-Q Send-Q Local Address:Port                 Peer Address:Port               
u_seq  ESTAB      0      0      @00017 309855                * 309856                users:(("code",pid=17009,fd=17))
u_seq  ESTAB      0      0      @00012 157444                * 157445                users:(("chrome",pid=5834,fd=10))

user_ent_hash_build

当我们加了-p参数之后,程序运行的结果:

case 'p':
    show_users++;
    user_ent_hash_build();
    break;

show_users的值变为1,程序接着执行user_ent_hash_build()

static void user_ent_hash_build(void)
{
    const char *root = getenv("PROC_ROOT") ? : "/proc/";
    struct dirent *d;
    char name[1024];
    int nameoff;
    DIR *dir;
    char *pid_context;
    char *sock_context;
    const char *no_ctx = "unavailable";
    static int user_ent_hash_build_init;
 
    /* If show_users & show_proc_ctx set only do this once */
    if (user_ent_hash_build_init != 0)
        return;
 
    user_ent_hash_build_init = 1;
 
    strlcpy(name, root, sizeof(name));
 
    if (strlen(name) == 0 || name[strlen(name)-1] != '/')
        strcat(name, "/");
 
    nameoff = strlen(name);
 
    dir = opendir(name);
    if (!dir)
        return;
 
    while ((d = readdir(dir)) != NULL) {
        struct dirent *d1;
        char process[16];
        char *p;
        int pid, pos;
        DIR *dir1;
        char crap;
 
        if (sscanf(d->d_name, "%d%c", &pid, &crap) != 1)
            continue;
 
        if (getpidcon(pid, &pid_context) != 0)
            pid_context = strdup(no_ctx);
 
        snprintf(name + nameoff, sizeof(name) - nameoff, "%d/fd/", pid);
        pos = strlen(name);
        if ((dir1 = opendir(name)) == NULL) {
            free(pid_context);
            continue;
        }
 
        process[0] = '\0';
        p = process;
 
        while ((d1 = readdir(dir1)) != NULL) {
            const char *pattern = "socket:[";
            unsigned int ino;
            char lnk[64];
            int fd;
            ssize_t link_len;
            char tmp[1024];
 
            if (sscanf(d1->d_name, "%d%c", &fd, &crap) != 1)
                continue;
 
            snprintf(name+pos, sizeof(name) - pos, "%d", fd);
 
            link_len = readlink(name, lnk, sizeof(lnk)-1);
            if (link_len == -1)
                continue;
            lnk[link_len] = '\0';
 
            if (strncmp(lnk, pattern, strlen(pattern)))
                continue;
 
            sscanf(lnk, "socket:[%u]", &ino);
 
            snprintf(tmp, sizeof(tmp), "%s/%d/fd/%s",
                    root, pid, d1->d_name);
 
            if (getfilecon(tmp, &sock_context) <= 0)
                sock_context = strdup(no_ctx);
 
            if (*p == '\0') {
                FILE *fp;
 
                snprintf(tmp, sizeof(tmp), "%s/%d/stat",
                    root, pid);
                if ((fp = fopen(tmp, "r")) != NULL) {
                    if (fscanf(fp, "%*d (%[^)])", p) < 1)
                        ; /* ignore */
                    fclose(fp);
                }
            }
            user_ent_add(ino, p, pid, fd,
                    pid_context, sock_context);
            free(sock_context);
        }
        free(pid_context);
        closedir(dir1);
    }
    closedir(dir);
}

这个解析方法与netstat中的prg_cache_load的方式类似.都是解析/proc/pid/fd下面的内容获得socketinode编号.得到pid,inodefd之后,调用user_ent_add()方法.

user_ent_add

static void user_ent_add(unsigned int ino, char *process,
                    int pid, int fd,
                    char *proc_ctx,
                    char *sock_ctx)
{
    struct user_ent *p, **pp;
 
    p = malloc(sizeof(struct user_ent));
    if (!p) {
        fprintf(stderr, "ss: failed to malloc buffer\n");
        abort();
    }
    p->next = NULL;
    p->ino = ino;
    p->pid = pid;
    p->fd = fd;
    p->process = strdup(process);
    p->process_ctx = strdup(proc_ctx);
    p->socket_ctx = strdup(sock_ctx);
 
    pp = &user_ent_hash[user_ent_hashfn(ino)];
    p->next = *pp;
    *pp = p;
}

获取inode,pidfd信息,最终组成一个链表.

proc_ctx_print

程序在输出结果的时候,调用proc_ctx_print()

static void proc_ctx_print(struct sockstat *s)
{
    char *buf;
 
    if (show_proc_ctx || show_sock_ctx) {
        if (find_entry(s->ino, &buf,
                (show_proc_ctx & show_sock_ctx) ?
                PROC_SOCK_CTX : PROC_CTX) > 0) {
            printf(" users:(%s)", buf);
            free(buf);
        }
    } else if (show_users) {
        if (find_entry(s->ino, &buf, USERS) > 0) {
            printf(" users:(%s)", buf);
            free(buf);
        }
    }
}

如果show_users>0,执行find_entry(0,根据inode编号找到对应进程的信息:

find_entry

static int find_entry(unsigned int ino, char **buf, int type)
{
    struct user_ent *p;
    int cnt = 0;
    char *ptr;
    char *new_buf;
    int len, new_buf_len;
    int buf_used = 0;
    int buf_len = 0;
 
    if (!ino)
        return 0;
 
    p = user_ent_hash[user_ent_hashfn(ino)];
    ptr = *buf = NULL;
    while (p) {
        if (p->ino != ino)
            goto next;
 
        while (1) {
            ptr = *buf + buf_used;
            switch (type) {
            case USERS:
                len = snprintf(ptr, buf_len - buf_used,
                    "(\"%s\",pid=%d,fd=%d),",
                    p->process, p->pid, p->fd);
                break;
            case PROC_CTX:
                len = snprintf(ptr, buf_len - buf_used,
                    "(\"%s\",pid=%d,proc_ctx=%s,fd=%d),",
                    p->process, p->pid,
                    p->process_ctx, p->fd);
                break;
            case PROC_SOCK_CTX:
                len = snprintf(ptr, buf_len - buf_used,
                    "(\"%s\",pid=%d,proc_ctx=%s,fd=%d,sock_ctx=%s),",
                    p->process, p->pid,
                    p->process_ctx, p->fd,
                    p->socket_ctx);
                break;
            default:
                fprintf(stderr, "ss: invalid type: %d\n", type);
                abort();
            }
 
            if (len < 0 || len >= buf_len - buf_used) {
                new_buf_len = buf_len + ENTRY_BUF_SIZE;
                new_buf = realloc(*buf, new_buf_len);
                if (!new_buf) {
                    fprintf(stderr, "ss: failed to malloc buffer\n");
                    abort();
                }
                *buf = new_buf;
                buf_len = new_buf_len;
                continue;
            } else {
                buf_used += len;
                break;
            }
        }
        cnt++;
next:
        p = p->next;
    }
    if (buf_used) {
        ptr = *buf + buf_used;
        ptr[-1] = '\0';
    }
    return cnt;
}

通过遍历p = user_ent_hash[user_ent_hashfn(ino)];这个链表得到得到所有的节点.然后利用

p = user_ent_hash[user_ent_hashfn(ino)];
ptr = *buf = NULL;
while (p) {
    if (p->ino != ino)
        goto next;

如果遍历得到inode相等,那么就说明找到了pid,最终输出的结果如下:

switch (type) {
            case USERS:
                len = snprintf(ptr, buf_len - buf_used,
                    "(\"%s\",pid=%d,fd=%d),",
                    p->process, p->pid, p->fd);
                break;

最终输出的结果是:

Netid  State      Recv-Q Send-Q Local Address:Port                 Peer Address:Port               
u_seq  ESTAB      0      0      @00017 309855                * 309856                users:(("code",pid=17009,fd=17))

总结

由于ssnetstat数据获取的方式不同,导致在执行效率上面存在很大的差别.ssnetstat这两种方式也给我们需要获取主机上面的网络数据提供了一个很好的思路.

netstat源代码调试&原理分析

说明

估计平时大部分人都是通过netstat来查看网络状态,但是事实是netstat已经逐渐被其他的命令替代,很多新的Linux发行版本中很多都不支持了netstat。以ubuntu 18.04为例来进行说明:

~ netstat 
zsh: command not found: netstat

按照difference between netstat and ss in linux?这篇文章的说法:

NOTE This program is obsolete. Replacement for netstat is ss.
Replacement for netstat -r is ip route. Replacement for netstat -i is
ip -s link. Replacement for netstat -g is ip maddr.

中文含义就是:netstat已经过时了,netstat的部分命令已经被ip这个命令取代了,当然还有更为强大的ssss命令用来显示处于活动状态的套接字信息。ss命令可以用来获取socket统计信息,它可以显示和netstat类似的内容。但ss的优势在于它能够显示更多更详细的有关TCP和连接状态的信息,而且比netstat更快速更高效。netstat的原理显示网络的原理仅仅只是解析/proc/net/tcp,所以如果服务器的socket连接数量变得非常大,那么通过netstat执行速度是非常慢。而ss采用的是通过tcp_diag的方式来获取网络信息,tcp_diag通过netlink的方式从内核拿到网络信息,这也是ss更高效更全面的原因。

下图就展示了ssnetstat在监控上面的区别。

ss.png

ss是获取的socket的信息,而netstat是通过解析/proc/net/下面的文件来获取信息包括Sockets,TCP/UDPIPEthernet信息。

netstatss的效率的对比,找同一台机器执行:

time ss
........
real    0m0.016s
user    0m0.001s
sys        0m0.001s
--------------------------------
time netstat
real    0m0.198s
user    0m0.009s
sys        0m0.011s

ss明显比netstat更加高效.

netstat简介

netstat是在net-tools工具包下面的一个工具集,net-tools提供了一份net-tools的源码,我们通过net-tools来看看netstat的实现原理。

netstat源代码调试

下载net-tools之后,导入到Clion中,创建CMakeLists.txt文件,内容如下:

cmake_minimum_required(VERSION 3.13)
project(test C)

set(BUILD_DIR .)

#add_executable()
add_custom_target(netstat command -c ${BUILD_DIR})

修改根目录下的Makefile中的59行的编译配置为:

CFLAGS ?= -O0 -g3

netstat.png

按照如上图设置自己的编译选项

以上就是搭建netstat的源代码调试过程。

tcp show

在netstat不需要任何参数的情况,程序首先会运行到2317行的tcp_info()

#if HAVE_AFINET
    if (!flag_arg || flag_tcp) {
        i = tcp_info();
        if (i)
        return (i);
    }

    if (!flag_arg || flag_sctp) {
        i = sctp_info();
        if (i)
        return (i);
    }
.........

跟踪进入到tcp_info():

static int tcp_info(void)
{
    INFO_GUTS6(_PATH_PROCNET_TCP, _PATH_PROCNET_TCP6, "AF INET (tcp)",
           tcp_do_one, "tcp", "tcp6");
}

参数的情况如下:

_PATH_PROCNET_TCP,在lib/pathnames.h中定义,是#define _PATH_PROCNET_TCP "/proc/net/tcp"

_PATH_PROCNET_TCP6, 在lib/pathnames.h中定义, 是#define _PATH_PROCNET_TCP6 "/proc/net/tcp6"

tcp_do_one,函数指针,位于1100行,部分代码如下:

static void tcp_do_one(int lnr, const char *line, const char *prot)
{
unsigned long rxq, txq, time_len, retr, inode;
int num, local_port, rem_port, d, state, uid, timer_run, timeout;
char rem_addr[128], local_addr[128], timers[64];
const struct aftype *ap;
struct sockaddr_storage localsas, remsas;
struct sockaddr_in *localaddr = (struct sockaddr_in *)&localsas;
struct sockaddr_in *remaddr = (struct sockaddr_in *)&remsas;
......

tcp_do_one()就是用来解析/proc/net/tcp/proc/net/tcp6每一行的含义的,关于/proc/net/tcp的每一行的含义可以参考之前写过的osquery源码解读之分析process_open_socket中的扩展章节。

INFO_GUTS6

#define INFO_GUTS6(file,file6,name,proc,prot4,prot6)    \
 char buffer[8192];                    \
 int rc = 0;                        \
 int lnr = 0;                        \
 if (!flag_arg || flag_inet) {                \
    INFO_GUTS1(file,name,proc,prot4)            \
 }                            \
 if (!flag_arg || flag_inet6) {                \
    INFO_GUTS2(file6,proc,prot6)            \
 }                            \
 INFO_GUTS3

INFO_GUTS6采用了#define的方式进行定义,最终根据是flag_inet(IPv4)或者flag_inet6(IPv6)的选项分别调用不同的函数,我们以INFO_GUTS1(file,name,proc,prot4)进一步分析。

INFO_GUTS1

#define INFO_GUTS1(file,name,proc,prot)            \
  procinfo = proc_fopen((file));            \
  if (procinfo == NULL) {                \
    if (errno != ENOENT && errno != EACCES) {        \
      perror((file));                    \
      return -1;                    \
    }                            \
    if (!flag_noprot && (flag_arg || flag_ver))        \
      ESYSNOT("netstat", (name));            \
    if (!flag_noprot && flag_arg)            \
      rc = 1;                        \
  } else {                        \
    do {                        \
      if (fgets(buffer, sizeof(buffer), procinfo))    \
        (proc)(lnr++, buffer,prot);            \
    } while (!feof(procinfo));                \
    fclose(procinfo);                    \
  }

rocinfo = proc_fopen((file)) 获取/proc/net/tcp的文件句柄

fgets(buffer, sizeof(buffer), procinfo) 解析文件内容并将每一行的内容存储在buffer

(proc)(lnr++, buffer,prot),利用(proc)函数解析buffer(proc)就是前面说明的tcp_do_one()函数

tcp_do_one

" 14: 020110AC:B498 CF0DE1B9:4362 06 00000000:00000000 03:000001B2 00000000 0 0 0 3 0000000000000000这一行为例来说明tcp_do_one()函数的执行过程。

tcp_do_one_1.png

由于分析是Ipv4,所以会跳过#if HAVE_AFINET6这段代码。之后执行:

num = sscanf(line,
    "%d: %64[0-9A-Fa-f]:%X %64[0-9A-Fa-f]:%X %X %lX:%lX %X:%lX %lX %d %d %lu %*s\n",
         &d, local_addr, &local_port, rem_addr, &rem_port, &state,
         &txq, &rxq, &timer_run, &time_len, &retr, &uid, &timeout, &inode);
if (num < 11) {
    fprintf(stderr, _("warning, got bogus tcp line.\n"));
    return;
}

解析数据,并将每一列的数据分别填充到对应的字段上面。分析一下其中的每个字段的定义:

char rem_addr[128], local_addr[128], timers[64];
struct sockaddr_storage localsas, remsas;
struct sockaddr_in *localaddr = (struct sockaddr_in *)&localsas;
struct sockaddr_in *remaddr = (struct sockaddr_in *)&remsas;

在Linux中sockaddr_insockaddr_storage的定义如下:

struct sockaddr {
   unsigned short    sa_family;    // address family, AF_xxx
   char              sa_data[14];  // 14 bytes of protocol address
};


struct  sockaddr_in {
    short  int  sin_family;                      /* Address family */
    unsigned  short  int  sin_port;       /* Port number */
    struct  in_addr  sin_addr;              /* Internet address */
    unsigned  char  sin_zero[8];         /* Same size as struct sockaddr */
};
/* Internet address. */
struct in_addr {
  uint32_t       s_addr;     /* address in network byte order */
};

struct sockaddr_storage {
    sa_family_t  ss_family;     // address family

    // all this is padding, implementation specific, ignore it:
    char      __ss_pad1[_SS_PAD1SIZE];
    int64_t   __ss_align;
    char      __ss_pad2[_SS_PAD2SIZE];
};

之后代码继续执行:

sscanf(local_addr, "%X", &localaddr->sin_addr.s_addr);
sscanf(rem_addr, "%X", &remaddr->sin_addr.s_addr);
localsas.ss_family = AF_INET;
remsas.ss_family = AF_INET;

local_addr使用sscanf(,"%X")得到对应的十六进制,保存到&localaddr->sin_addr.s_addr(即in_addr结构体中的s_addr)中,同理&remaddr->sin_addr.s_addr。运行结果如下所示:

saddr.png

addr_do_one

addr_do_one(local_addr, sizeof(local_addr), 22, ap, &localsas, local_port, "tcp");
addr_do_one(rem_addr, sizeof(rem_addr), 22, ap, &remsas, rem_port, "tcp");

程序继续执行,最终会执行到addr_do_one()函数,用于解析本地IP地址和端口,以及远程IP地址和端口。

static void addr_do_one(char *buf, size_t buf_len, size_t short_len, const struct aftype *ap,
            const struct sockaddr_storage *addr,
            int port, const char *proto
)
{
    const char *sport, *saddr;
    size_t port_len, addr_len;

    saddr = ap->sprint(addr, flag_not & FLAG_NUM_HOST);
    sport = get_sname(htons(port), proto, flag_not & FLAG_NUM_PORT);
    addr_len = strlen(saddr);
    port_len = strlen(sport);
    if (!flag_wide && (addr_len + port_len > short_len)) {
        /* Assume port name is short */
        port_len = netmin(port_len, short_len - 4);
        addr_len = short_len - port_len;
        strncpy(buf, saddr, addr_len);
        buf[addr_len] = '\0';
        strcat(buf, ":");
        strncat(buf, sport, port_len);
    } else
          snprintf(buf, buf_len, "%s:%s", saddr, sport);
}

1.saddr = ap->sprint(addr, flag_not & FLAG_NUM_HOST); 这个表示是否需要将addr转换为域名的形式。由于addr值是127.0.0.1,转换之后得到的就是localhost,其中FLAG_NUM_HOST的就等价于--numeric-hosts的选项。

2.sport = get_sname(htons(port), proto, flag_not & FLAG_NUM_PORT);,port无法无法转换,其中的FLAG_NUM_PORT就等价于--numeric-ports这个选项。

3.!flag_wide && (addr_len + port_len > short_len 这个代码的含义是判断是否需要对IP和PORT进行截断。其中flag_wide的等同于-W, --wide don't truncate IP addresses。而short_len长度是22.

4.snprintf(buf, buf_len, "%s:%s", saddr, sport);,将IP:PORT赋值给buf.

output

最终程序执行

printf("%-4s  %6ld %6ld %-*s %-*s %-11s",
           prot, rxq, txq, (int)netmax(23,strlen(local_addr)), local_addr, (int)netmax(23,strlen(rem_addr)), rem_addr, _(tcp_state[state]));

按照制定的格式解析,输出结果

finish_this_one

最终程序会执行finish_this_one(uid,inode,timers);.

static void finish_this_one(int uid, unsigned long inode, const char *timers)
{
    struct passwd *pw;

    if (flag_exp > 1) {
    if (!(flag_not & FLAG_NUM_USER) && ((pw = getpwuid(uid)) != NULL))
        printf(" %-10s ", pw->pw_name);
    else
        printf(" %-10d ", uid);
    printf("%-10lu",inode);
    }
    if (flag_prg)
    printf(" %-" PROGNAME_WIDTHs "s",prg_cache_get(inode));
    if (flag_selinux)
    printf(" %-" SELINUX_WIDTHs "s",prg_cache_get_con(inode));

    if (flag_opt)
    printf(" %s", timers);
    putchar('\n');
}

1.flag_exp 等同于-e的参数。-e, --extend display other/more information.举例如下:

netstat -e 
Proto Recv-Q Send-Q Local Address           Foreign Address         State       User       Inode
tcp        0      0 localhost:6379          172.16.1.200:46702    ESTABLISHED redis      437788048

netstat
Proto Recv-Q Send-Q Local Address           Foreign Address         State      
tcp        0      0 localhost:6379          172.16.1.200:46702    ESTABLISHED

发现使用-e参数会多显示UserInode号码。而在本例中还可以如果用户名不存在,则显示uid
getpwuid

2.flag_prg等同于-p, --programs display PID/Program name for sockets.举例如下:

netstat -pe
Proto Recv-Q Send-Q Local Address           Foreign Address         State       User       Inode      PID/Program name
tcp        0      0 localhost:6379          172.16.1.200:34062      ESTABLISHED redis      437672000  6017/redis-server *

netstat -e
Proto Recv-Q Send-Q Local Address           Foreign Address         State       User       Inode
tcp        0      0 localhost:6379          172.16.1.200:46702    ESTABLISHED redis      437788048

可以看到是通过prg_cache_get(inode)inode来找到对应的PID和进程信息;

3.flag_selinux等同于-Z, --context display SELinux security context for sockets

prg_cache_get

对于上面的通过inode找到对应进程的方法非常的好奇,于是去追踪prg_cache_get()函数的实现。

#define PRG_HASH_SIZE 211

#define PRG_HASHIT(x) ((x) % PRG_HASH_SIZE)

static struct prg_node {
    struct prg_node *next;
    unsigned long inode;
    char name[PROGNAME_WIDTH];
    char scon[SELINUX_WIDTH];
} *prg_hash[PRG_HASH_SIZE];

static const char *prg_cache_get(unsigned long inode)
{
    unsigned hi = PRG_HASHIT(inode);
    struct prg_node *pn;

    for (pn = prg_hash[hi]; pn; pn = pn->next)
    if (pn->inode == inode)
        return (pn->name);
    return ("-");
}

prg_hash中存储了所有的inode编号与program的对应关系,所以当给定一个inode编号时就能够找到对应的程序名称。那么prg_hash又是如何初始化的呢?

prg_cache_load

我们使用debug模式,加入-p的运行参数:

netstat-p.png

程序会运行到2289行的prg_cache_load(); 进入到prg_cache_load()函数中.

由于整个函数的代码较长,拆分来分析.

一、获取fd

#define PATH_PROC      "/proc"
#define PATH_FD_SUFF    "fd"
#define PATH_FD_SUFFl       strlen(PATH_FD_SUFF)
#define PATH_PROC_X_FD      PATH_PROC "/%s/" PATH_FD_SUFF
#define PATH_CMDLINE    "cmdline"
#define PATH_CMDLINEl       strlen(PATH_CMDLINE)
 
if (!(dirproc=opendir(PATH_PROC))) goto fail;
    while (errno = 0, direproc = readdir(dirproc)) {
    for (cs = direproc->d_name; *cs; cs++)
        if (!isdigit(*cs))
        break;
    if (*cs)
        continue;
    procfdlen = snprintf(line,sizeof(line),PATH_PROC_X_FD,direproc->d_name);
    if (procfdlen <= 0 || procfdlen >= sizeof(line) - 5)
        continue;
    errno = 0;
    dirfd = opendir(line);
    if (! dirfd) {
        if (errno == EACCES)
        eacces = 1;
        continue;
    }
    line[procfdlen] = '/';
    cmdlp = NULL;

1.dirproc=opendir(PATH_PROC);errno = 0, direproc = readdir(dirproc) 遍历/proc拿到所有的pid

2.procfdlen = snprintf(line,sizeof(line),PATH_PROC_X_FD,direproc→d_name); 遍历所有的/proc/pid拿到所有进程的fd

3.dirfd = opendir(line); 得到/proc/pid/fd的文件句柄

二、获取inode

while ((direfd = readdir(dirfd))) {
        /* Skip . and .. */
        if (!isdigit(direfd->d_name[0]))
            continue;
    if (procfdlen + 1 + strlen(direfd->d_name) + 1 > sizeof(line))
       continue;
    memcpy(line + procfdlen - PATH_FD_SUFFl, PATH_FD_SUFF "/",
        PATH_FD_SUFFl + 1);
    safe_strncpy(line + procfdlen + 1, direfd->d_name,
                    sizeof(line) - procfdlen - 1);
    lnamelen = readlink(line, lname, sizeof(lname) - 1);
    if (lnamelen == -1)
        continue;
        lname[lnamelen] = '\0';  /*make it a null-terminated string*/
 
        if (extract_type_1_socket_inode(lname, &inode) < 0)
            if (extract_type_2_socket_inode(lname, &inode) < 0)
            continue;

1.memcpy(line + procfdlen - PATH_FD_SUFFl, PATH_FD_SUFF "/",PATH_FD_SUFFl + 1);safe_strncpy(line + procfdlen + 1, direfd->d_name, sizeof(line) - procfdlen - 1); 得到遍历之后的fd信息,比如/proc/pid/fd

2.lnamelen = readlink(line, lname, sizeof(lname) - 1); 得到fd所指向的link,因为通常情况下fd一般都是链接,要么是socket链接要么是pipe链接.如下所示:

$ ls -al /proc/1289/fd
total 0
dr-x------ 2 username username  0 May 25 15:45 .
dr-xr-xr-x 9 username username  0 May 25 09:11 ..
lr-x------ 1 username username 64 May 25 16:23 0 -> 'pipe:[365366]'
l-wx------ 1 username username 64 May 25 16:23 1 -> 'pipe:[365367]'
l-wx------ 1 username username 64 May 25 16:23 2 -> 'pipe:[365368]'
lr-x------ 1 username username 64 May 25 16:23 3 -> /proc/uptime

3.通过extract_type_1_socket_inode获取到link中对应的inode编号.

#define PRG_SOCKET_PFX    "socket:["
#define PRG_SOCKET_PFXl (strlen(PRG_SOCKET_PFX))
static int extract_type_1_socket_inode(const char lname[], unsigned long * inode_p) {
 
/* If lname is of the form "socket:[12345]", extract the "12345"
   as *inode_p.  Otherwise, return -1 as *inode_p.
   */
// 判断长度是否小于 strlen(socket:[)+3
if (strlen(lname) < PRG_SOCKET_PFXl+3) return(-1);
 
//函数说明:memcmp()用来比较s1 和s2 所指的内存区间前n 个字符。
// 判断lname是否以 socket:[ 开头
if (memcmp(lname, PRG_SOCKET_PFX, PRG_SOCKET_PFXl)) return(-1);
if (lname[strlen(lname)-1] != ']') return(-1);  {
    char inode_str[strlen(lname + 1)];  /* e.g. "12345" */
    const int inode_str_len = strlen(lname) - PRG_SOCKET_PFXl - 1;
    char *serr;
 
    // 获取到inode的编号
    strncpy(inode_str, lname+PRG_SOCKET_PFXl, inode_str_len);
    inode_str[inode_str_len] = '\0';
    *inode_p = strtoul(inode_str, &serr, 0);
    if (!serr || *serr || *inode_p == ~0)
        return(-1);
}

4.获取程序对应的cmdline

if (!cmdlp) {
    if (procfdlen - PATH_FD_SUFFl + PATH_CMDLINEl >=sizeof(line) - 5)
        continue;
    safe_strncpy(line + procfdlen - PATH_FD_SUFFl, PATH_CMDLINE,sizeof(line) - procfdlen + PATH_FD_SUFFl);
fd = open(line, O_RDONLY);
if (fd < 0)
    continue;
cmdllen = read(fd, cmdlbuf, sizeof(cmdlbuf) - 1);
if (close(fd))
    continue;
if (cmdllen == -1)
    continue;
if (cmdllen < sizeof(cmdlbuf) - 1)
    cmdlbuf[cmdllen]='\0';
if (cmdlbuf[0] == '/' && (cmdlp = strrchr(cmdlbuf, '/')))
    cmdlp++;
else
    cmdlp = cmdlbuf;
}

由于cmdline是可以直接读取的,所以并不需要像读取fd那样借助与readlink()函数,直接通过read(fd, cmdlbuf, sizeof(cmdlbuf) - 1)即可读取文件内容.

5.snprintf(finbuf, sizeof(finbuf), "%s/%s", direproc->d_name, cmdlp); 拼接pidcmdlp,最终得到的就是类似与6017/redis-server *这样的效果 

6.最终程序调用prg_cache_add(inode, finbuf, "-");将解析得到的inodefinbuf加入到缓存中.

prg_cache_add

#define PRG_HASH_SIZE 211
#define PRG_HASHIT(x) ((x) % PRG_HASH_SIZE)
static struct prg_node {
    struct prg_node *next;
    unsigned long inode;
    char name[PROGNAME_WIDTH];
    char scon[SELINUX_WIDTH];
} *prg_hash[ ];
 
static void prg_cache_add(unsigned long inode, char *name, const char *scon)
{
    unsigned hi = PRG_HASHIT(inode);
    struct prg_node **pnp,*pn;
 
    prg_cache_loaded = 2;
    for (pnp = prg_hash + hi; (pn = *pnp); pnp = &pn->next) {
    if (pn->inode == inode) {
        /* Some warning should be appropriate here
           as we got multiple processes for one i-node */
        return;
    }
    }
    if (!(*pnp = malloc(sizeof(**pnp))))
    return;
    pn = *pnp;
    pn->next = NULL;
    pn->inode = inode;
    safe_strncpy(pn->name, name, sizeof(pn->name));
 
    {
    int len = (strlen(scon) - sizeof(pn->scon)) + 1;
    if (len > 0)
            safe_strncpy(pn->scon, &scon[len + 1], sizeof(pn->scon));
    else
            safe_strncpy(pn->scon, scon, sizeof(pn->scon));
    }
 
}

1.unsigned hi = PRG_HASHIT(inode); 使用inode整除211得到作为hash

2.for (pnp = prg_hash + hi; (pn = *pnp); pnp = &pn->next) 由于prg_hash是一个链表结构,所以通过for循环找到链表的结尾;

3.pn = *pnp;pn->next = NULL;pn->inode = inode;safe_strncpy(pn->name, name, sizeof(pn→name)); 为新的inode赋值并将其加入到链表的末尾;

所以prg_node是一个全局变量,是一个链表结果,保存了inode编号与pid/cmdline之间的对应关系;

prg_cache_get

static const char *prg_cache_get(unsigned long inode)
{
    unsigned hi = PRG_HASHIT(inode);
    struct prg_node *pn;
 
    for (pn = prg_hash[hi]; pn; pn = pn->next)
    if (pn->inode == inode)
        return (pn->name);
    return ("-");
}

分析完毕prg_cache_add()之后,看prg_cache_get()就很简单了.

1.unsigned hi = PRG_HASHIT(inode);通过inode号拿到hash

2.for (pn = prg_hash[hi]; pn; pn = pn->next) 遍历prg_hash链表中的每一个节点,如果遍历的inode与目标的inode相符就返回对应的信息.

总结

通过对netstat的一个简单的分析,可以发现其实netstat就是通过遍历/proc目录下的目录或者是文件来获取对应的信息.如果在一个网络进程频繁关闭打开关闭,那么使用netstat显然是相当耗时的.

osquery源码解读之分析process_open_socket

说明

上篇文章主要是对shell_history的实现进行了分析。通过分析可以发现,osquery良好的设计使得源码简单易读。shell_history的整体实现也比较简单,通过读取并解析.bash_history中的内容,获得用户输入的历史命令。本文分析的是process_open_sockets,相比较而言实现更加复杂,对Linux也需要有更深的了解。

使用说明

首先查看process_open_sockets表的定义:

table_name("process_open_sockets")
description("Processes which have open network sockets on the system.")
schema([
    Column("pid", INTEGER, "Process (or thread) ID", index=True),
    Column("fd", BIGINT, "Socket file descriptor number"),
    Column("socket", BIGINT, "Socket handle or inode number"),
    Column("family", INTEGER, "Network protocol (IPv4, IPv6)"),
    Column("protocol", INTEGER, "Transport protocol (TCP/UDP)"),
    Column("local_address", TEXT, "Socket local address"),
    Column("remote_address", TEXT, "Socket remote address"),
    Column("local_port", INTEGER, "Socket local port"),
    Column("remote_port", INTEGER, "Socket remote port"),
    Column("path", TEXT, "For UNIX sockets (family=AF_UNIX), the domain path"),
])
extended_schema(lambda: LINUX() or DARWIN(), [
    Column("state", TEXT, "TCP socket state"),
])
extended_schema(LINUX, [
    Column("net_namespace", TEXT, "The inode number of the network namespace"),
])
implementation("system/process_open_sockets@genOpenSockets")
examples([
  "select * from process_open_sockets where pid = 1",
])

其中有几个列名需要说明一下:

  • fd,表示文件描述符
  • socket,进行网络通讯时,socket通信对应的inode number
  • family,表示是IPv4/IPv6,最后的结果是以数字的方式展示
  • protocol,表示是TCP/UDP。

我们进行一个简单的反弹shell的操作,然后使用查询process_open_sockets表的信息。

osquery> select pos.*,p.cwd,p.cmdline from process_open_sockets pos left join processes p where pos.family=2 and pos.pid=p.pid and net_namespace<>0;
+-------+----+----------+--------+----------+---------------+----------------+------------+-------------+------+-------------+---------------+-----------------+-----------+
| pid   | fd | socket   | family | protocol | local_address | remote_address | local_port | remote_port | path | state       | net_namespace | cwd             | cmdline   |
+-------+----+----------+--------+----------+---------------+----------------+------------+-------------+------+-------------+---------------+-----------------+-----------+
| 37272 | 15 | 52319299 | 2      | 6        | 192.168.2.142 | 172.22.0.176   | 43522      | 9091        |      | ESTABLISHED | 4026531956    | /home/xingjun   | osqueryi  |
| 91155 | 2  | 56651533 | 2      | 6        | 192.168.2.142 | 192.168.2.150  | 53486      | 8888        |      | ESTABLISHED | 4026531956    | /proc/79036/net | /bin/bash |
+-------+----+----------+--------+----------+---------------+----------------+------------+-------------+------+-------------+---------------+-----------------+-----------+

process_open_sockets表的实现是位于osquery/tables/networking/linux/process_open_sockets.cpp中。

分析

process_open_sockets的实现全部是在QueryData genOpenSockets(QueryContext &context)一个方法中。

官方给出的分析步骤是:

Data for this table is fetched from 3 different sources and correlated.

1.Collect all sockets associated with each pid by going through all files under /proc/<pid>/fd and search for links of the type socket:[<inode>]. Extract the inode and fd (filename) and index it by inode number. The inode can then be used to correlate pid and fd with the socket information collected on step 3. The map generated in this step will only contain sockets associated with pids in the list, so it will also be used to filter the sockets later if pid_filter is set.

2.Collect the inode for the network namespace associated with each pid. Every time a new namespace is found execute step 3 to get socket basic information.

3.Collect basic socket information for all sockets under a specifc network namespace. This is done by reading through files under /proc/<pid>/net for the first pid we find in a certain namespace. Notice this will collect information for all sockets on the namespace not only for sockets associated with the specific pid, therefore only needs to be run once. From this step we collect the inodes of each of the sockets, and will use that to correlate the socket information with the information collect on steps 1 and 2.

其实大致步骤就是:

  1. 收集进程所对应的fd信息,尤其是socketinode信息;
  2. 收集进程的namespaceinode信息;
  3. 读取/proc/<pid>/net中的信息,与第一步中的socketinode信息进行比对,找出pid所对应的网络连接信息。

为了方便说明,我对整个函数的代码进行切割,分步说明。

获取pid信息

std::set <std::string> pids;
if (context.constraints["pid"].exists(EQUALS)) {
    pids = context.constraints["pid"].getAll(EQUALS);
}

bool pid_filter = !(pids.empty() ||
                    std::find(pids.begin(), pids.end(), "-1") != pids.end());

if (!pid_filter) {
    pids.clear();
    status = osquery::procProcesses(pids);
    if (!status.ok()) {
        VLOG(1) << "Failed to acquire pid list: " << status.what();
        return results;
    }
}
  • 前面的context.constraints["pid"].exists(EQUALS)pid_filter为了判断在SQL语句中是否存在where子句以此拿到选择的pid
  • 调用status = osquery::procProcesses(pids);拿到对应的PID信息。

跟踪进入到osquery/filesystem/linux/proc.cpp:procProcesses(std::set<std::string>& processes):

Status procProcesses(std::set<std::string>& processes) {
  auto callback = [](const std::string& pid,
                     std::set<std::string>& _processes) -> bool {
    _processes.insert(pid);
    return true;
  };

  return procEnumerateProcesses<decltype(processes)>(processes, callback);
}

继续跟踪进入到osquery/filesystem/linux/proc.h:procEnumerateProcesses(UserData& user_data,bool (*callback)(const std::string&, UserData&))

const std::string kLinuxProcPath = "/proc";
.....
template<typename UserData>
Status procEnumerateProcesses(UserData &user_data,bool (*callback)(const std::string &, UserData &)) {
    boost::filesystem::directory_iterator it(kLinuxProcPath), end;

    try {
        for (; it != end; ++it) {
            if (!boost::filesystem::is_directory(it->status())) {
                continue;
            }

            // See #792: std::regex is incomplete until GCC 4.9
            const auto &pid = it->path().leaf().string();
            if (std::atoll(pid.data()) <= 0) {
                continue;
            }

            bool ret = callback(pid, user_data);
            if (ret == false) {
                break;
            }
        }
    } catch (const boost::filesystem::filesystem_error &e) {
        VLOG(1) << "Exception iterating Linux processes: " << e.what();
        return Status(1, e.what());
    }

    return Status(0);
}
  • boost::filesystem::directory_iterator it(kLinuxProcPath), end;遍历/proc目录下面所有的文件,
  • const auto &pid = it->path().leaf().string();..; bool ret = callback(pid, user_data);,通过it->path().leaf().string()判断是否为数字,之后调用bool ret = callback(pid, user_data);
  • callback方法_processes.insert(pid);return true;将查询到的pid全部记录到user_data中。

以一个反弹shell的例子为例,使用osqueryi查询到的信息如下:

osquery> select * from process_open_sockets where pid=14960; 
+-------+----+--------+--------+----------+---------------+----------------+------------+-------------+------+-------------+---------------+
| pid   | fd | socket | family | protocol | local_address | remote_address | local_port | remote_port | path | state       | net_namespace |
+-------+----+--------+--------+----------+---------------+----------------+------------+-------------+------+-------------+---------------+
| 14960 | 2  | 307410 | 2      | 6        | 192.168.2.156 | 192.168.2.145  | 51118      | 8888        |      | ESTABLISHED | 4026531956    |
+-------+----+--------+--------+----------+---------------+----------------+------------+-------------+------+-------------+---------------+

获取进程对应的pid和fd信息

/* Use a set to record the namespaces already processed */
std::set <ino_t> netns_list;
SocketInodeToProcessInfoMap inode_proc_map;
SocketInfoList socket_list;
for (const auto &pid : pids) {
    /* Step 1 */
    status = procGetSocketInodeToProcessInfoMap(pid, inode_proc_map);
    if (!status.ok()) {
        VLOG(1) << "Results for process_open_sockets might be incomplete. Failed "
                    "to acquire socket inode to process map for pid "
                << pid << ": " << status.what();
    }

在拿到所有的需要查询的pid信息之后,调用status = procGetSocketInodeToProcessInfoMap(pid, inode_proc_map);,顾名思义就是用于获取进程所对应的socket inode编号。进入到osquery/filesystem/linux/proc.cpp:procGetSocketInodeToProcessInfoMap()中:

Status procGetSocketInodeToProcessInfoMap(const std::string &pid,SocketInodeToProcessInfoMap &result) {
    auto callback = [](const std::string &_pid,
                        const std::string &fd,
                        const std::string &link,
                        SocketInodeToProcessInfoMap &_result) -> bool {
        /* We only care about sockets. But there will be other descriptors. */
        if (link.find("socket:[") != 0) {
            return true;
        }

        std::string inode = link.substr(8, link.size() - 9);
        _result[inode] = {_pid, fd};
        return true;
    };

    return procEnumerateProcessDescriptors<decltype(result)>(
            pid, result, callback);
}

其中的auto callback定义的是一个回调函数,进入到procEnumerateProcessDescriptors()中分析:

const std::string kLinuxProcPath = "/proc";
....
template<typename UserData>
Status procEnumerateProcessDescriptors(const std::string &pid,
                                        UserData &user_data,
                                        bool (*callback)(const std::string &pid,
                                                        const std::string &fd,
                                                        const std::string &link,
                                                        UserData &user_data)) {
    std::string descriptors_path = kLinuxProcPath + "/" + pid + "/fd";

    try {
        boost::filesystem::directory_iterator it(descriptors_path), end;

        for (; it != end; ++it) {
            auto fd = it->path().leaf().string();

            std::string link;
            Status status = procReadDescriptor(pid, fd, link);
            if (!status.ok()) {
                VLOG(1) << "Failed to read the link for file descriptor " << fd
                        << " of pid " << pid << ". Data might be incomplete.";
            }

            bool ret = callback(pid, fd, link, user_data);
            if (ret == false) {
                break;
            }
        }
    } catch (boost::filesystem::filesystem_error &e) {
        VLOG(1) << "Exception iterating process file descriptors: " << e.what();
        return Status(1, e.what());
    }

    return Status(0);
}

这个代码写得十分清晰。

1.遍历/proc/pid/fd,拿到所有的文件描述符。在本例中即为/proc/14960/fd

1.jpg

2.回调bool ret = callback(pid, fd, link, user_data);,即之前在procGetSocketInodeToProcessInfoMap中定义的:

auto callback = [](const std::string &_pid,
                    const std::string &fd,
                    const std::string &link,
                    SocketInodeToProcessInfoMap &_result) -> bool {
    /* We only care about sockets. But there will be other descriptors. */
    if (link.find("socket:[") != 0) {
        return true;
    }

    std::string inode = link.substr(8, link.size() - 9);
    _result[inode] = {_pid, fd};
    return true;
};

代码也十分地简单,拿到fd所对应的link,检查是否存在socket:[,如果存在获取对应的inode。由于查询的是process_open_sockets,所以我们仅仅只关心存在socket的link,在本例中就是307410。最终在SocketInodeToProcessInfoMap中的结构就是_result[inode] = {_pid, fd};。以inode作为key,包含了pidfd的信息。

获取进程对应的ns信息

在上一步status = procGetSocketInodeToProcessInfoMap(pid, inode_proc_map);执行完毕之后,得到_result[inode] = {_pid, fd};。将inodepidfd进行了关联。接下里就是解析进程对应的ns信息。

ino_t ns;
ProcessNamespaceList namespaces;
status = procGetProcessNamespaces(pid, namespaces, {"net"});
if (status.ok()) {
    ns = namespaces["net"];
} else {
    /* If namespaces are not available we allways set ns to 0 and step 3 will
        * run once for the first pid in the list.
        */
    ns = 0;
    VLOG(1) << "Results for the process_open_sockets might be incomplete."
                "Failed to acquire network namespace information for process "
                "with pid "
            << pid << ": " << status.what();
}
跟踪进入到`status = procGetProcessNamespaces(pid, namespaces, {"net"});`,进入到`osquery/filesystem/linux/proc.cpp:procGetProcessNamespaces()`
const std::string kLinuxProcPath = "/proc";
...
Status procGetProcessNamespaces(const std::string &process_id,ProcessNamespaceList &namespace_list,std::vector <std::string> namespaces) {
    namespace_list.clear();
    if (namespaces.empty()) {
        namespaces = kUserNamespaceList;
    }
    auto process_namespace_root = kLinuxProcPath + "/" + process_id + "/ns";
    for (const auto &namespace_name : namespaces) {
        ino_t namespace_inode;
        auto status = procGetNamespaceInode(namespace_inode, namespace_name, process_namespace_root);
        if (!status.ok()) {
            continue;
        }
        namespace_list[namespace_name] = namespace_inode;
    }
    return Status(0, "OK");
}

遍历const auto &namespace_name : namespaces,之后进入到process_namespace_root中,调用procGetNamespaceInode(namespace_inode, namespace_name, process_namespace_root);进行查询。在本例中namespaces{"net"},process_namespace_root/proc/14960/ns

分析procGetNamespaceInode(namespace_inode, namespace_name, process_namespace_root):

Status procGetNamespaceInode(ino_t &inode,const std::string &namespace_name,const std::string &process_namespace_root) {
    inode = 0;
    auto path = process_namespace_root + "/" + namespace_name;
    char link_destination[PATH_MAX] = {};
    auto link_dest_length = readlink(path.data(), link_destination, PATH_MAX - 1);
    if (link_dest_length < 0) {
        return Status(1, "Failed to retrieve the inode for namespace " + path);
    }

    // The link destination must be in the following form: namespace:[inode]
    if (std::strncmp(link_destination,
                        namespace_name.data(),
                        namespace_name.size()) != 0 ||
        std::strncmp(link_destination + namespace_name.size(), ":[", 2) != 0) {
        return Status(1, "Invalid descriptor for namespace " + path);
    }

    // Parse the inode part of the string; strtoull should return us a pointer
    // to the closing square bracket
    const char *inode_string_ptr = link_destination + namespace_name.size() + 2;
    char *square_bracket_ptr = nullptr;

    inode = static_cast<ino_t>(
            std::strtoull(inode_string_ptr, &square_bracket_ptr, 10));
    if (inode == 0 || square_bracket_ptr == nullptr ||
        *square_bracket_ptr != ']') {
        return Status(1, "Invalid inode value in descriptor for namespace " + path);
    }

    return Status(0, "OK");
}

根据procGetProcessNamespaces()中定义的相关变量,得到path是/proc/pid/ns/net,在本例中是/proc/14960/ns/net。通过inode = static_cast<ino_t>(std::strtoull(inode_string_ptr, &square_bracket_ptr, 10));,解析/proc/pid/ns/net所对应的inode。在本例中:

2.jpg

所以取到的inode4026531956。之后在procGetProcessNamespaces()中执行namespace_list[namespace_name] = namespace_inode;,所以namespace_list['net']=4026531956。最终ns = namespaces["net"];,所以得到的ns=4026531956

解析进程的net信息

// Linux proc protocol define to net stats file name.
const std::map<int, std::string> kLinuxProtocolNames = {
        {IPPROTO_ICMP,    "icmp"},
        {IPPROTO_TCP,     "tcp"},
        {IPPROTO_UDP,     "udp"},
        {IPPROTO_UDPLITE, "udplite"},
        {IPPROTO_RAW,     "raw"},
};
...
if (netns_list.count(ns) == 0) {
    netns_list.insert(ns);

    /* Step 3 */
    for (const auto &pair : kLinuxProtocolNames) {
        status = procGetSocketList(AF_INET, pair.first, ns, pid, socket_list);
        if (!status.ok()) {
            VLOG(1)
                    << "Results for process_open_sockets might be incomplete. Failed "
                        "to acquire basic socket information for AF_INET "
                    << pair.second << ": " << status.what();
        }

        status = procGetSocketList(AF_INET6, pair.first, ns, pid, socket_list);
        if (!status.ok()) {
            VLOG(1)
                    << "Results for process_open_sockets might be incomplete. Failed "
                        "to acquire basic socket information for AF_INET6 "
                    << pair.second << ": " << status.what();
        }
    }
    status = procGetSocketList(AF_UNIX, IPPROTO_IP, ns, pid, socket_list);
    if (!status.ok()) {
        VLOG(1)
                << "Results for process_open_sockets might be incomplete. Failed "
                    "to acquire basic socket information for AF_UNIX: "
                << status.what();
    }
}

对于icmp/tcp/udp/udplite/raw会调用status = procGetSocketList(AF_INET|AF_INET6|AF_UNIX, pair.first, ns, pid, socket_list);。我们这里仅仅以procGetSocketList(AF_INET, pair.first, ns, pid, socket_list);进行说明(其中的ns就是4026531956)。

Status procGetSocketList(int family, int protocol,ino_t net_ns,const std::string &pid, SocketInfoList &result) {
    std::string path = kLinuxProcPath + "/" + pid + "/net/";

    switch (family) {
        case AF_INET:
            if (kLinuxProtocolNames.count(protocol) == 0) {
                return Status(1,"Invalid family " + std::to_string(protocol) +" for AF_INET familiy");
            } else {
                path += kLinuxProtocolNames.at(protocol);
            }
            break;

        case AF_INET6:
            if (kLinuxProtocolNames.count(protocol) == 0) {
                return Status(1,"Invalid protocol " + std::to_string(protocol) +" for AF_INET6 familiy");
            } else {
                path += kLinuxProtocolNames.at(protocol) + "6";
            }
            break;

        case AF_UNIX:
            if (protocol != IPPROTO_IP) {
                return Status(1,
                                "Invalid protocol " + std::to_string(protocol) +
                                " for AF_UNIX familiy");
            } else {
                path += "unix";
            }

            break;

        default:
            return Status(1, "Invalid family " + std::to_string(family));
    }

    std::string content;
    if (!osquery::readFile(path, content).ok()) {
        return Status(1, "Could not open socket information from " + path);
    }

    Status status(0);
    switch (family) {
        case AF_INET:
        case AF_INET6:
            status = procGetSocketListInet(family, protocol, net_ns, path, content, result);
            break;

        case AF_UNIX:
            status = procGetSocketListUnix(net_ns, path, content, result);
            break;
    }

    return status;
}

由于我们的传参是family=AF_INET,protocol=tcp,net_ns=4026531956,pid=14960。执行流程如下:

1.path += kLinuxProtocolNames.at(protocol);,得到path是/proc/14960/net/tcp

2.osquery::readFile(path, content).ok(),读取文件内容,即/proc/14960/net/tcp所对应的文件内容。在本例中是:

sl  local_address rem_address   st tx_queue rx_queue tr tm->when retrnsmt   uid  timeout inode
0: 00000000:1538 00000000:0000 0A 00000000:00000000 00:00000000 00000000    26        0 26488 1 ffff912c69c21740 100 0 0 10 0
1: 0100007F:0019 00000000:0000 0A 00000000:00000000 00:00000000 00000000     0        0 28721 1 ffff912c69c23640 100 0 0 10 0
2: 00000000:01BB 00000000:0000 0A 00000000:00000000 00:00000000 00000000     0        0 27739 1 ffff912c69c21f00 100 0 0 10 0
3: 0100007F:18EB 00000000:0000 0A 00000000:00000000 00:00000000 00000000   988        0 25611 1 ffff912c69c207c0 100 0 0 10 0
4: 00000000:0050 00000000:0000 0A 00000000:00000000 00:00000000 00000000     0        0 27737 1 ffff912c69c226c0 100 0 0 10 0
5: 017AA8C0:0035 00000000:0000 0A 00000000:00000000 00:00000000 00000000     0        0 29031 1 ffff912c69c23e00 100 0 0 10 0
6: 00000000:0016 00000000:0000 0A 00000000:00000000 00:00000000 00000000     0        0 25754 1 ffff912c69c20f80 100 0 0 10 0
7: 0100007F:0277 00000000:0000 0A 00000000:00000000 00:00000000 00000000     0        0 25590 1 ffff912c69c20000 100 0 0 10 0
8: 9C02A8C0:C7AE 9102A8C0:22B8 01 00000000:00000000 00:00000000 00000000  1000

3.执行procGetSocketListInet(family, protocol, net_ns, path, content, result);

分析

static Status procGetSocketListInet(int family,int protocol,ino_t net_ns,const std::string &path,const std::string &content,SocketInfoList &result) {
    // The system's socket information is tokenized by line.
    bool header = true;
    for (const auto &line : osquery::split(content, "\n")) {
        if (header) {
            if (line.find("sl") != 0 && line.find("sk") != 0) {
                return Status(1, std::string("Invalid file header for ") + path);
            }
            header = false;
            continue;
        }

        // The socket information is tokenized by spaces, each a field.
        auto fields = osquery::split(line, " ");
        if (fields.size() < 10) {
            VLOG(1) << "Invalid socket descriptor found: '" << line
                    << "'. Skipping this entry";
            continue;
        }

        // Two of the fields are the local/remote address/port pairs.
        auto locals = osquery::split(fields[1], ":");
        auto remotes = osquery::split(fields[2], ":");

        if (locals.size() != 2 || remotes.size() != 2) {
            VLOG(1) << "Invalid socket descriptor found: '" << line
                    << "'. Skipping this entry";
            continue;
        }

        SocketInfo socket_info = {};
        socket_info.socket = fields[9];
        socket_info.net_ns = net_ns;
        socket_info.family = family;
        socket_info.protocol = protocol;
        socket_info.local_address = procDecodeAddressFromHex(locals[0], family);
        socket_info.local_port = procDecodePortFromHex(locals[1]);
        socket_info.remote_address = procDecodeAddressFromHex(remotes[0], family);
        socket_info.remote_port = procDecodePortFromHex(remotes[1]);

        if (protocol == IPPROTO_TCP) {
            char *null_terminator_ptr = nullptr;
            auto integer_socket_state =
                    std::strtoull(fields[3].data(), &null_terminator_ptr, 16);
            if (integer_socket_state == 0 ||
                integer_socket_state >= tcp_states.size() ||
                null_terminator_ptr == nullptr || *null_terminator_ptr != 0) {
                socket_info.state = "UNKNOWN";
            } else {
                socket_info.state = tcp_states[integer_socket_state];
            }
        }

        result.push_back(std::move(socket_info));
    }

    return Status(0);
}

整个执行流程如下:

1.const auto &line : osquery::split(content, "\n");.. auto fields = osquery::split(line, " ");解析文件,读取每一行的内容。对每一行采用空格分割;

2.解析信息

SocketInfo socket_info = {};
socket_info.socket = fields[9];
socket_info.net_ns = net_ns;
socket_info.family = family;
socket_info.protocol = protocol;
socket_info.local_address = procDecodeAddressFromHex(locals[0], family);
socket_info.local_port = procDecodePortFromHex(locals[1]);
socket_info.remote_address = procDecodeAddressFromHex(remotes[0], family);
socket_info.remote_port = procDecodePortFromHex(remotes[1]);

解析/proc/14960/net/tcp文件中的每一行,分别填充至socket_info结构中。但是在/proc/14960/net/tcp并不是所有的信息都是我们需要的,我们还需要对信息进行过滤。可以看到最后一条的inode307410才是我们需要的。

获取进程连接信息

将解析完毕/proc/14960/net/tcp获取socket_info之后,继续执行genOpenSockets()中的代码。

    auto proc_it = inode_proc_map.find(info.socket);
    if (proc_it != inode_proc_map.end()) {
        r["pid"] = proc_it->second.pid;
        r["fd"] = proc_it->second.fd;
    } else if (!pid_filter) {
        r["pid"] = "-1";
        r["fd"] = "-1";
    } else {
        /* If we're filtering by pid we only care about sockets associated with
            * pids on the list.*/
        continue;
    }

    r["socket"] = info.socket;
    r["family"] = std::to_string(info.family);
    r["protocol"] = std::to_string(info.protocol);
    r["local_address"] = info.local_address;
    r["local_port"] = std::to_string(info.local_port);
    r["remote_address"] = info.remote_address;
    r["remote_port"] = std::to_string(info.remote_port);
    r["path"] = info.unix_socket_path;
    r["state"] = info.state;
    r["net_namespace"] = std::to_string(info.net_ns);

    results.push_back(std::move(r));
}

其中关键代码是:

auto proc_it = inode_proc_map.find(info.socket);
if (proc_it != inode_proc_map.end()) {

通过遍历socket_list,判断在第一步保存在inode_proc_map中的inode信息与info中的inode信息是否一致,如果一致,说明就是我们需要的那个进程的网络连接的信息。最终保存我们查询到的信息results.push_back(std::move(r));
到这里,我们就查询到了进程的所有的网络连接的信息。最终通过osquery展现。

osquery> select * from process_open_sockets where pid=14960; 
+-------+----+--------+--------+----------+---------------+----------------+------------+-------------+------+-------------+---------------+
| pid   | fd | socket | family | protocol | local_address | remote_address | local_port | remote_port | path | state       | net_namespace |
+-------+----+--------+--------+----------+---------------+----------------+------------+-------------+------+-------------+---------------+
| 14960 | 2  | 307410 | 2      | 6        | 192.168.2.156 | 192.168.2.145  | 51118      | 8888        |      | ESTABLISHED | 4026531956    |
+-------+----+--------+--------+----------+---------------+----------------+------------+-------------+------+-------------+---------------+

以上就是整个osquery执行process_open_sockets表查询的整个流程。

扩展

Linux一些皆文件的特性,使得我们能够通过读取Linux下某些文件信息获取系统/进程所有的信息。在前面我们仅仅是从osquery的角度来分析的。本节主要是对Linux中的与网络有关、进程相关的信息进行说明。

/proc/net/tcp/proc/net/udp中保存了当前系统中所有的进程信息,与/proc/pid/net/tcp或者是/proc/pid/net/udp中保存的信息完全相同。

/proc/net/tcp信息如下:

sl  local_address rem_address   st tx_queue rx_queue tr tm->when retrnsmt   uid  timeout inode
0: 00000000:1538 00000000:0000 0A 00000000:00000000 00:00000000 00000000    26        0 26488 1 ffff912c69c21740 100 0 0 10 0
1: 0100007F:0019 00000000:0000 0A 00000000:00000000 00:00000000 00000000     0        0 28721 1 ffff912c69c23640 100 0 0 10 0
2: 00000000:01BB 00000000:0000 0A 00000000:00000000 00:00000000 00000000     0        0 27739 1 ffff912c69c21f00 100 0 0 10 0
3: 00000000:1F40 00000000:0000 0A 00000000:00000000 00:00000000 00000000  1000        0 471681 1 ffff912c37488f80 100 0 0 10 0
4: 0100007F:18EB 00000000:0000 0A 00000000:00000000 00:00000000 00000000   988        0 25611 1 ffff912c69c207c0 100 0 0 10 0
5: 00000000:0050 00000000:0000 0A 00000000:00000000 00:00000000 00000000     0        0 27737 1 ffff912c69c226c0 100 0 0 10 0
6: 017AA8C0:0035 00000000:0000 0A 00000000:00000000 00:00000000 00000000     0        0 29031 1 ffff912c69c23e00 100 0 0 10 0
7: 00000000:0016 00000000:0000 0A 00000000:00000000 00:00000000 00000000     0        0 25754 1 ffff912c69c20f80 100 0 0 10 0
8: 0100007F:0277 00000000:0000 0A 00000000:00000000 00:00000000 00000000     0        0 25590 1 ffff912c69c20000 100 0 0 10 0
9: 9C02A8C0:C7AE 9102A8C0:22B8 01 00000000:00000000 00:00000000 00000000  1000        0 307410 1 ffff912c374887c0 20 0 0 10 -1

/proc/14960/net/tcp信息如下:

sl  local_address rem_address   st tx_queue rx_queue tr tm->when retrnsmt   uid  timeout inode
0: 00000000:1538 00000000:0000 0A 00000000:00000000 00:00000000 00000000    26        0 26488 1 ffff912c69c21740 100 0 0 10 0
1: 0100007F:0019 00000000:0000 0A 00000000:00000000 00:00000000 00000000     0        0 28721 1 ffff912c69c23640 100 0 0 10 0
2: 00000000:01BB 00000000:0000 0A 00000000:00000000 00:00000000 00000000     0        0 27739 1 ffff912c69c21f00 100 0 0 10 0
3: 0100007F:18EB 00000000:0000 0A 00000000:00000000 00:00000000 00000000   988        0 25611 1 ffff912c69c207c0 100 0 0 10 0
4: 00000000:0050 00000000:0000 0A 00000000:00000000 00:00000000 00000000     0        0 27737 1 ffff912c69c226c0 100 0 0 10 0
5: 017AA8C0:0035 00000000:0000 0A 00000000:00000000 00:00000000 00000000     0        0 29031 1 ffff912c69c23e00 100 0 0 10 0
6: 00000000:0016 00000000:0000 0A 00000000:00000000 00:00000000 00000000     0        0 25754 1 ffff912c69c20f80 100 0 0 10 0
7: 0100007F:0277 00000000:0000 0A 00000000:00000000 00:00000000 00000000     0        0 25590 1 ffff912c69c20000 100 0 0 10 0
8: 9C02A8C0:C7AE 9102A8C0:22B8 01 00000000:00000000 00:00000000 00000000  1000        0 307410 1 ffff912c374887c0 20 0 0 10 -1

我们每一列的含义都是固定的,我们以最终一列9C02A8C0:C7AE 9102A8C0:22B8 01 00000000:00000000 00:00000000 00000000 1000 0 307410 1 ffff912c374887c0 20 0 0 10 -1为例进行说明。

1.local_address,本地通讯端口和IP,本例是9C02A8C0:C7AE9C02A8C0,是本地IP。9C02A8C0是十六进制,转换为十进制是2617419968,将其转换为IP地址则是156.2.168.192,倒装一下得到192.168.2.156C7AE转化为十进制是51118。所以当进行网络通信时,得到本地IP是192.168.2.156,端口是51118

2.rem_address,远程服务器通信端口和IP,本例是9102A8C0:22B89102A8C0是远程IP。分析方法和local_address相同,得到远程IP是192.168.2.145,端口是8888

3.st,socket的状态,本例是01st的不同的值表示不同的含义。

  • 01: ESTABLISHED,
  • 02: SYN_SENT
  • 03: SYN_RECV
  • 04: FIN_WAIT1
  • 05: FIN_WAIT2
  • 06: TIME_WAIT
  • 07: CLOSE
  • 08: CLOSE_WAIT
  • 09: LAST_ACK
  • 0A: LISTEN
  • 0B: CLOSING

所以在本例中01则说明是ESTABLISHED状态。

4.tx_queue, 表示发送队列中的数据长度,本例是00000000

5.rx_queue, 如果状态是ESTABLISHED,表示接受队列中数据长度;如果是LISTEN,表示已完成连接队列的长度;

6.tr,定时器类型。为0,表示没有启动计时器;为1,表示重传定时器;为2,表示连接定时器;为3,表示TIME_WAIT定时器;为4,表示持续定时器;

7.tm->when,超时时间。

8.retrnsmt,超时重传次数

9.uid,用户id

10.timeout,持续定时器或保洁定时器周期性发送出去但未被确认的TCP段数目,在收到ACK之后清零

11.inode,socket连接对应的inode

12.1,没有显示header,表示的是socket的引用数目

13.ffff912c374887c0,没有显示header,表示sock结构对应的地址

14.20,没有显示header,表示RTO,单位是clock_t

15.0,用来计算延时确认的估值

16.0,快速确认数和是否启用标志位的或元算结果

17.10,当前拥塞窗口大小

18.-1,如果慢启动阈值大于等于0x7fffffff显示-1,否则表示慢启动阈值

proc_net_tcp_decode这篇文章对每个字段也进行了详细地说明。

通过查看某个具体的pidfd信息,检查是否存在以socket:开头的文件描述符,如果存在则说明存在网络通信。

3.jpg

在得到了socket所对应的inode之后,就可以在/proc/net/tcp中查询对应的socket的信息,比如远程服务器的IP和端口信息。这样通过socketinode就可以关联进程信息和它的网络信息。

总结

论读源代码的重要性

以上

osquery源码解读之分析shell_history

说明

前面两篇主要是对osquery的使用进行了说明,本篇文章将会分析osquery的源码。本文将主要对shell_historyprocess_open_sockets两张表进行说明。通过对这些表的实现分析,一方面能够了解osquery的实现通过SQL查询系统信息的机制,另一方面可以加深对Linux系统的理解。

表的说明

shell_history是用于查看shell的历史记录,而process_open_sockets是用于记录主机当前的网络行为。示例用法如下:

shell_history

osquery> select * from shell_history limit 3;
+------+------+-------------------------------------------------------------------+-----------------------------+
| uid  | time | command                                                           | history_file                |
+------+------+-------------------------------------------------------------------+-----------------------------+
| 1000 | 0    | pwd                                                               | /home/username/.bash_history |
| 1000 | 0    | ps -ef                                                            | /home/username/.bash_history |
| 1000 | 0    | ps -ef | grep java                                                | /home/username/.bash_history |
+------+------+-------------------------------------------------------------------+-----------------------------+

process_open_socket显示了一个反弹shell的链接。

osquery> select * from process_open_sockets order by pid desc limit 1;
+--------+----+----------+--------+----------+---------------+----------------+------------+-------------+------+------------+---------------+
| pid    | fd | socket   | family | protocol | local_address | remote_address | local_port | remote_port | path | state      | net_namespace |
+--------+----+----------+--------+----------+---------------+----------------+------------+-------------+------+------------+---------------+
| 115567 | 3  | 16467630 | 2      | 6        | 192.168.2.142 | 192.168.2.143  | 46368      | 8888        |      | ESTABLISH  | 0             |
+--------+----+----------+--------+----------+---------------+----------------+------------+-------------+------+------------+---------------+

osquery整体的代码结构十分地清晰。所有表的定义都是位于specs下面,所有表的实现都是位于osquery/tables

我们以shell_history为例,其表的定义是在specs/posix/shell_history.table

table_name("shell_history")
description("A line-delimited (command) table of per-user .*_history data.")
schema([
    Column("uid", BIGINT, "Shell history owner", additional=True),
    Column("time", INTEGER, "Entry timestamp. It could be absent, default value is 0."),
    Column("command", TEXT, "Unparsed date/line/command history line"),
    Column("history_file", TEXT, "Path to the .*_history for this user"),
    ForeignKey(column="uid", table="users"),
])
attributes(user_data=True, no_pkey=True)
implementation("shell_history@genShellHistory")
examples([
    "select * from users join shell_history using (uid)",
])
fuzz_paths([
    "/home",
    "/Users",
])s

shell_history.table中已经定义了相关的信息,入口是shell_history.cpp中的genShellHistory()函数,甚至给出了示例的SQL语句select * from users join shell_history using (uid)shell_history.cpp是位于osquery/tables/system/posix/shell_history.cpp中。

同理,process_open_sockets的表定义位于specs/process_open_sockets.table,实现位于osquery/tables/networking/[linux|freebsd|windows]/process_open_sockets.cpp。可以看到由于process_open_sockets在多个平台上面都有,所以在linux/freebsd/windows中都存在process_open_sockets.cpp的实现。本文主要是以linux为例。

shell_history实现

前提知识

在分析之前,介绍一下Linux中的一些基本概念。我们常常会看到各种不同的unix shell,如bash、zsh、tcsh、sh等等。bash是我们目前最常见的,它几乎是所有的类unix操作中内置的一个shell。而zsh相对于bash增加了更多的功能。我们在终端输入各种命令时,其实都是使用的这些shell。

我们在用户的根目录下方利用ls -all就可以发现存在.bash_history文件,此文件就记录了我们在终端中输入的所有的命令。同样地,如果我们使用zsh,则会存在一个.zsh_history记录我们的命令。

同时在用户的根目录下还存在.bash_sessions的目录,根据这篇文章的介绍:

A new folder (~/.bash_sessions/) is used to store HISTFILE’s and .session files that are unique to sessions. If $BASH_SESSION or $TERM_SESSION_ID is set upon launching the shell (i.e. if Terminal is resuming from a saved state), the associated HISTFILE is merged into the current one, and the .session file is ran. Session saving is facilitated by means of an EXIT trap being set for a function bash_update_session_state.

.bash_sessions中存储了特定SESSION的HISTFILE和.session文件。如果在启动shell时设置了$BASH_SESSION$TERM_SESSION_ID。当此特定的SESSION启动了之后就会利用$BASH_SESSION$TERM_SESSION_ID恢复之前的状态。这也说明在.bash_sessions目录下也会存在*.history用于记录特定SESSION的历史命令信息。

分析

QueryData genShellHistory(QueryContext& context) {
    QueryData results;
    // Iterate over each user
    QueryData users = usersFromContext(context);
    for (const auto& row : users) {
        auto uid = row.find("uid");
        auto gid = row.find("gid");
        auto dir = row.find("directory");
        if (uid != row.end() && gid != row.end() && dir != row.end()) {
            genShellHistoryForUser(uid->second, gid->second, dir->second, results);
            genShellHistoryFromBashSessions(uid->second, dir->second, results);
        }
    }

    return results;
}

分析shell_history.cpp的入口函数genShellHistory():

遍历所有的用户,拿到uidgiddirectory。之后调用genShellHistoryForUser()获取用户的shell记录genShellHistoryFromBashSessions()genShellHistoryForUser()作用类似。

genShellHistoryForUser():

void genShellHistoryForUser(const std::string& uid, const std::string& gid, const std::string& directory, QueryData& results) {
    auto dropper = DropPrivileges::get();
    if (!dropper->dropTo(uid, gid)) {
        VLOG(1) << "Cannot drop privileges to UID " << uid;
        return;
    }

    for (const auto& hfile : kShellHistoryFiles) {
        boost::filesystem::path history_file = directory;
        history_file /= hfile;
        genShellHistoryFromFile(uid, history_file, results);
    }
}

可以看到在执行之前调用了:

auto dropper = DropPrivileges::get();
if (!dropper->dropTo(uid, gid)) {
    VLOG(1) << "Cannot drop privileges to UID " << uid;
    return;
}

用于对giduid降权,为什么要这么做呢?后来询问外国网友,给了一个很详尽的答案:

Think about a scenario where you are a malicious user and you spotted a vulnerability(buffer overflow) which none of us has. In the code (osquery which is running usually with root permission) you also know that history files(controlled by you) are being read by code(osquery). Now you stored a shell code (a code which is capable of destroying anything in the system)such a way that it would overwrite the saved rip. So once the function returns program control is with the injected code(shell code) with root privilege. With dropping privilege you reduce the chance of putting entire system into danger.

There are other mitigation techniques (e.g. stack guard) to avoid above scenario but multiple defenses are required

简而言之,osquery一般都是使用root权限运行的,如果攻击者在.bash_history中注入了一段恶意的shellcode代码。那么当osquery读到了这个文件之后,攻击者就能够获取到root权限了,所以通过降权的方式就能够很好地避免这样的问题。

/**
* @brief The privilege/permissions dropper deconstructor will restore
* effective permissions.
*
* There should only be a single drop of privilege/permission active.
*/
virtual ~DropPrivileges();

可以看到当函数被析构之后,就会重新恢复对应文件的权限。

之后遍历kShellHistoryFiles文件,执行genShellHistoryFromFile()代码。kShellHistoryFiles在之前已经定义,内容是:

const std::vector<std::string> kShellHistoryFiles = {
    ".bash_history", ".zsh_history", ".zhistory", ".history", ".sh_history",
};

可以发现其实在kShellHistoryFiles定义的就是常见的bash用于记录shell history目录的文件。最后调用genShellHistoryFromFile()读取.history文件,解析数据。

void genShellHistoryFromFile(const std::string& uid, const boost::filesystem::path& history_file, QueryData& results) {
    std::string history_content;
    if (forensicReadFile(history_file, history_content).ok()) {
        auto bash_timestamp_rx = xp::sregex::compile("^#(?P<timestamp>[0-9]+)$");
        auto zsh_timestamp_rx = xp::sregex::compile("^: {0,10}(?P<timestamp>[0-9]{1,11}):[0-9]+;(?P<command>.*)$");
        std::string prev_bash_timestamp;
        for (const auto& line : split(history_content, "\n")) {
            xp::smatch bash_timestamp_matches;
            xp::smatch zsh_timestamp_matches;

            if (prev_bash_timestamp.empty() &&
                xp::regex_search(line, bash_timestamp_matches, bash_timestamp_rx)) {
                prev_bash_timestamp = bash_timestamp_matches["timestamp"];
                continue;
            }

            Row r;

            if (!prev_bash_timestamp.empty()) {
                r["time"] = INTEGER(prev_bash_timestamp);
                r["command"] = line;
                prev_bash_timestamp.clear();
            } else if (xp::regex_search(
                    line, zsh_timestamp_matches, zsh_timestamp_rx)) {
                std::string timestamp = zsh_timestamp_matches["timestamp"];
                r["time"] = INTEGER(timestamp);
                r["command"] = zsh_timestamp_matches["command"];
            } else {
                r["time"] = INTEGER(0);
                r["command"] = line;
            }

            r["uid"] = uid;
            r["history_file"] = history_file.string();
            results.push_back(r);
        }
    }
}

整个代码逻辑非常地清晰。

  1. forensicReadFile(history_file, history_content)读取文件内容。
  2. 定义bash_timestamp_rxzsh_timestamp_rx的正则表达式,用于解析对应的.history文件的内容。 for (const auto& line : split(history_content, "\n"))读取文件的每一行,分别利用bash_timestamp_rxzsh_timestamp_rx解析每一行的内容。
  3. Row r;...;r["history_file"] = history_file.string();results.push_back(r);将解析之后的内容写入到Row中返回。

自此就完成了shell_history的解析工作。执行select * from shell_history就会按照上述的流程返回所有的历史命令的结果。

对于genShellHistoryFromBashSessions()函数:

void genShellHistoryFromBashSessions(const std::string &uid,const std::string &directory,QueryData &results) {
    boost::filesystem::path bash_sessions = directory;
    bash_sessions /= ".bash_sessions";

    if (pathExists(bash_sessions)) {
        bash_sessions /= "*.history";
        std::vector <std::string> session_hist_files;
        resolveFilePattern(bash_sessions, session_hist_files);

        for (const auto &hfile : session_hist_files) {
            boost::filesystem::path history_file = hfile;
            genShellHistoryFromFile(uid, history_file, results);
        }
    }
}

genShellHistoryFromBashSessions()获取历史命令的方法比较简单。

  1. 获取到.bash_sessions/*.history所有的文件;
  2. 同样调用genShellHistoryFromFile(uid, history_file, results);方法获取到历史命令;

总结

阅读一些优秀的开源软件的代码,不仅能够学习到相关的知识更能够了解到一些设计哲学。拥有快速学习能⼒的⽩帽子,是不能有短板的。有的只是⼤量的标准板和⼏块长板。

前端动态变化对抗Selenium类自动化工具思路探索

0x01 前言

这不是一篇安全技术文章,如果你关注业务安全,羊毛党对抗,爬虫对抗,可以慢慢观看。

在业务安全领域最大的困扰是来自各种各样的自动化工具的薅羊毛行为,羊毛党所使用的自动化武器五花八门,其中模拟更像真人的,使用比较多的是基于Selenium库实现的操作各种真实浏览器模拟的操作。Selenium库提供的webdriver,支持主流的浏览器如:chrome,firefox, ie,opera,phantomjs,safari 也支持浏览器的headless模式,更多的介绍可以看文章:https://www.cnblogs.com/zhaof/p/6953241.html

0x02 自动化工具的demo

羊毛党在薅羊毛前就会准备好自动化工具,如抢票活动的薅羊毛,他们需要自动化工具能够完成打开浏览器,打开登录网页,填充账号密码信息,点击完成登录,打开活动页面,点击抢票等一系列操作。

下面给出的一个demo是使用Selenium +Chrome 浏览器模拟的对测试网站demo. testfire.net进行自动化登录(或撞库,爆破密码)的代码。

201812051544014778183194.png

上面的代码中首先使用Selenium中的webdriver 打开本地的Chrome 浏览器,然后利用其提供的API 接口,直接打开登录地址。要完成填充账号,密码的信息,需要先找到输入位置。Selenium 提供了多种查找元素的接口,可以通过id ,name ,xpath ,css selector ,甚至通过文本来定位。当找到操作元素的位置后就可以对其进行相应的点击,输入,拖动等动作了。

0x03 GET一些知识背景

元素的定位应该是自动化测试的核心,要想操作一个对象,首先应该识别这个对象。 一个对象就是一个人一样,他会有各种的特征(属性),如比我们可以通过一个人的身份证号,姓名,或者他住在哪个街道、楼层、门牌找到这个人。

那么一个对象也有类似的属性,我们可以通过这个属性找到这对象。 webdriver 提供了一系列的对象定位方法,常用的有以下几种:

  • id
  • name
  • name
  • class name
  • link text
  • partial link text
  • tag name
  • xpath
  • css selector

我们拿百度搜索的页面来做例子,分别使用不同的定位方法下python的调用参数如下:

百度搜索输入框的input标签如下:

<input id="kw" name="wd" class="s_ipt" value="" maxlength="255" autocomplete="off">

哪么使用Selenium的自动化工具实现定位元素的方式有如下:

1.通过id定位

u = dr.find_element_by_id('kw')

2.通过name定位

u = dr.find_element_by_name('wd')

3.通过class name定位

s = dr.find_element_by_class_name('s_ipt')

4.通过xpath定位

s = dr.find_element_by_xpath('//*[@id="kw"]')

5.通过css selector定位

s = dr.browser.find_element_by_css_selector('#kw')

以上5种是使用Selenium编写自动化工具定位元素时使用的最多的方式,(其他的就不做列举了)可以看出自动化工具查找元素时对于这个输入标签的id,name, class等值的依赖是十分的强的。

我们再看看浏览器上javascript提供的查找元素的接口

201812051544016163471346.png

同样是使用了id, name, class的值进行的元素查找.

0x04 思考对抗思路

这种类型的利用驱动程序调用浏览器模拟人为操作的攻击方式通过传统的js防爬,UA 黑名单等方式是比较难防护的,因为这种攻击是真实浏览器发起的操作。回想到上面,这类攻击实现是通过webdriver 驱动浏览器进行的自动化操作,它的原理是通过注入自己的js 脚本到浏览器的每一个页面,通过js 完成页面的自动化点击,输入操作。哪么第一个思路就是去检测webdriver注入的js,这是目前一些防自动化攻击的厂商产品思路。防护产品A给每一个页面注入检测js ,通过检测webdriver 注入js 时带来的函数名,变量名等,实现自动化工具识别。这个检测思路也确实有效,但是这就像是杀软对抗病毒一样,通过特征库进行查杀。一定特征函数名,特征变量名等发生了变化如攻击者重写了webdriver 的驱动程序,修改了特征,哪防护产品就毫无办法了。这也是杀软一直以来面临的困扰。。。

变化一下思路,我们知道通过js 操作页面,同样需要先查找到元素。通过document. find ElenentBy**,最常见的是通过ID ,name 来查找元素。如果让 webdriver 的js注入过程是成功的,如果动态变化了标签的id, name, class name值,那么注入的JS脚本的find 定位元素过程是失败的,同样也能起到防护自动化工具的作用。

新的思路1:标签属性动态变化,干扰js 查找元素过程

对于防护产品而言,第一阶段保留原来的检测webdriver 注入js 的防护方式。随机切换到第二阶段即放过webdriver 的js 检测,动态混淆关键标签的ID ,NAME ,class name的值

什么时候动态变化?我们可以在upstream 第一次返回内容就开始,往后我们hook 一些关键事件,当页面触发这类事件时主动变化。

新的思路2:随机插入不可见相同标签

在分析Selenium中发现它在查找元素定位时不可见标签的会对其有干扰,在Selenium的git hub有提到过,高版本的Selenium 支持查找disabled 的标签。哪么我们就可以构造ID ,name 等属性一样的但是hidden隐藏的,disabled 不可操作的标签,可以成功干扰它的定位元素过程,并且界面UI不会察觉。这种思路作用于,自动化脚本不是通过ID,NAME来做元素定位,而是通过xpath使用 css selector来做定位的情况。

举个栗子:

201812051544019146607737.png

自动化工具通过xpath语法可以定位上面的搜索位置,上面的ID, name动态方式就无法干扰了。

上面图搜索框上面的hidden的是插入的虚假标签,

未插入前:羊毛程序通过下面的结构避开id,name完成定位

#main > header > div.header-content.clearfix > div.header-search-module > div.header-search-block > input

插入隐藏标签后:羊毛程序需要修改xpath结构,才能找到正确的输入位置。

#main > header > div.header-content.clearfix > div.header-search-module > div.header-search-block > input:nth-child(2)

这里插入的标签是hidden,disabled的,他UI不可见,不会被form表单提交到Server端的。完美的干扰了羊毛程序的查找。

新的思路3:text插入不可见字符/其他字符

上面的思路几乎干扰了webdriver最常用的元素定位方式,剩下的是通过link文本的方式进行定位。Selenium支持根据标签的text值或部分值进行搜索定位元素。我们在原始的text里插入不影响页面UI明显变化的特殊字符,就可以干扰webdriver的这种定位元素的方式。

新的思路4:化被动为主动的对抗思路

这里的反攻思路主要是利用浏览器的缺陷,利用自动化工具的bug,甚至漏洞来反击。这个比较有意思,我们通过GitHub上查看Selenium,Chromedriver, geckodriver的issues,收集广大网友们在自动化测试(薅羊毛)中发现的bug,我们故意构造这样的环境,迫使羊毛程序自己崩溃。用这种方式来干扰阻挡羊毛行动。

0x05 demo一下

长篇大论说了那么多,你一定心里发毛了“show me the code...”。基于上面的多种思路现在开始demo一下,我用mitmproxy模拟反向代理工具,编写python脚本来对原站返回内容做修改,插入特定的JS代码。插入的JS代码实现上面的思路和hook一些Window的事件比如onkeypress, onclick, onchange事件,当触发这些事件时页面上发生一些动态变化,这些动态变化UI上没有明显变化,但是影响了Selenium注入的JS脚本的自动化行为过程。

Demo1­:动态变化id,name值

mitmproxy脚本编写如下,给特定的域名,登录页面注入我的们JS脚本,并放在了body最后,应该能尽量减少对原页面的影响。

201812051544017569103738.png

注入的JS脚本太长限于篇幅不贴出来了,主要实现:

通过document.getElementsByTagName遍历input标签,记录所有标签的id,name,class name值,定义change 函数,用于随机生成新的id,name等值,hook一些事件,当触发事件后调用change函数实现一次动态修改。

最后当然还需要hook 表单提交的onsubmit函数,先还原真实的id,name,class name值,然后走正常提交。

注入JS后的效果:

当页面每一次触发事件,关键标签input的id,name进行一次变化。

201812051544018018166191.png

正常人类在无感知下完成业务的正常工作。

201812051544018322300778.png

使用自动化工具进行测试如下:

首先是没有经过mitmproxy的修改注入JS的,自动化登录成功如下图。

201812051544018394910059.png

然后访问经过代理后的注入JS的地址,自动化登录失败。

201812051544018454832802.png

根据console的提示,可以看出自动化工具在工作中因触发了一些事件,id, name动态变化了,程序无法定位完成对应的数据输入。

Demo2:插入不可见的相同标签

注入JS后的效果,插入了hidden,disabled,id,name等属性相同的标签:

201812051544019562214217.png

当然不能影响正常业务啦,如下:

201812051544019625790680.png

webdriver测试如下:下面是成功干扰了Firefox的程序,它说找到的元素不可用的。。

201812051544019677488025.png

Demo3:text插入不可见字符/其他字符

Selenium 类工具支持一种叫link 定位的操作,有时候不是一个输入框也不是一个按钮,而是一个文字链接,可以通过 link进行定位。

这种情况下,自动化程序不用id, name和xpath结构,直接通过文本查找匹配进行定位。我们为了干扰,能做的就是动态修改这些关键位置的文本,让其程序无法用于定位。

比如下面的测试代码,程序自动打开网页,通过“Sign In”找到登录链接,然后打开,通过“Contact Us”找到联系商家,然后通过“online form” 找到订单页面等。。这一些列自动化过程仅通过页面的文本来查找定位。前面的思路都无法对其干扰,我们如果将页面的“Sign In” 修改为“Sign . In”这样,做一些UI上尽可能小的变化动作。同样可能成功干扰自动化程序。

201812051544020237881426.png

对Webdriver成功干扰效果如下:

201812051544020448896634.png

这里仅能做到一个思路demo的证明,真正要做好还是不容易,因为对我来说还不知道怎么找到,浏览器上UI不显示,但是text是不同的修改办法。

跪求前端大佬指教。。

Demo4:化被动为主动的对抗思路

这里思路还没有完全构思好,用JS反攻客户端的好点子还没有。这里抛2个bug,怎么很好的用bug和JS反攻,期待和大佬的交流。bug地址:https://github.com/SeleniumHQ/selenium/issues/5840https://github.com/mozilla/geckodriver/issues/1228

0x06 总结

本文的思路早也有人提出过,写完本文后才知道早在16年携程就有前辈写文《关于反爬虫,看这一篇就够了》地址:https://blog.csdn.net/u013886628/article/details/51820221,提到过了。提及的内容截图如下:

201812051544021106525820.png

可见携程的反爬虫历史很悠长哦。晚辈献丑了,路漫漫其修远兮,吾将上下而求索。