-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1 iQEbBAABAgAGBQJX6kSEAAoJEO8Ells5jWIRKAkH9iMMzN9USOroQIWmiyMf5S7F mlsSeSccv+U5gA6wCJooA0dwMnAFnxJ3rTcV6BEL0jE0cVHanR61eDfpeOC0lKXw NUWc91Bf4Epg0cTk9fV6yv6xZOcuN/twukrQIEZjfldpbP0ba+WoBx3x0sdYen+M Xjaix011CUEx5VmVMx8g/LbnM8s1WO+CjEjIpWAas+1M68P+elne5nOaTaj+FyzV E9BkUkcXd5ByzikYRykgS/OJGRd7S+BBSFluISekwGjTcppRccAwZsGkgYXRrF3U 1g1LOT2xuz777uP7hBqZQRyZIAaOiLY89WUFuCL1BBLbbkAnT799J/e/n6sRSg== =2gpR -----END PGP SIGNATURE----- Merge remote-tracking branch 'remotes/jasowang/tags/net-pull-request' into staging # gpg: Signature made Tue 27 Sep 2016 11:05:56 BST # gpg: using RSA key 0xEF04965B398D6211 # gpg: Good signature from "Jason Wang (Jason Wang on RedHat) <jasowang@redhat.com>" # gpg: WARNING: This key is not certified with sufficiently trusted signatures! # gpg: It is not certain that the signature belongs to the owner. # Primary key fingerprint: 215D 46F4 8246 689E C77F 3562 EF04 965B 398D 6211 * remotes/jasowang/tags/net-pull-request: (27 commits) imx_fec: fix error in qemu_send_packet argument mcf_fec: fix error in qemu_send_packet argument net: mcf: limit buffer descriptor count e1000e: Fix EIAC register implementation e1000e: Fix spurious RX TCP ACK interrupts e1000e: Fix OTHER interrupts processing for MSI-X e1000e: Fix PBACLR implementation e1000e: Fix CTRL_EXT.EIAME behavior e1000e: Flush receive queues on link up e1000e: Flush all receive queues on receive enable net: limit allocation in nc_sendv_compat tap: Allow specifying a bridge e1000: fix buliding complaint docs: Add documentation for COLO-proxy MAINTAINERS: add maintainer for COLO-proxy filter-rewriter: rewrite tcp packet to keep secondary connection filter-rewriter: track connection and parse packet filter-rewriter: introduce filter-rewriter initialization colo-compare: add TCP, UDP, ICMP packet comparison colo-compare: introduce packet comparison thread ... Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
		
						commit
						333ec4ca6a
					
				| @ -1364,6 +1364,15 @@ F: util/uuid.c | |||||||
| F: include/qemu/uuid.h | F: include/qemu/uuid.h | ||||||
| F: tests/test-uuid.c | F: tests/test-uuid.c | ||||||
| 
 | 
 | ||||||
|  | COLO Proxy | ||||||
|  | M: Zhang Chen <zhangchen.fnst@cn.fujitsu.com> | ||||||
|  | M: Li Zhijian <lizhijian@cn.fujitsu.com> | ||||||
|  | S: Supported | ||||||
|  | F: docs/colo-proxy.txt | ||||||
|  | F: net/colo* | ||||||
|  | F: net/filter-rewriter.c | ||||||
|  | F: net/filter-mirror.c | ||||||
|  | 
 | ||||||
| Usermode Emulation | Usermode Emulation | ||||||
| ------------------ | ------------------ | ||||||
| Overall | Overall | ||||||
|  | |||||||
							
								
								
									
										188
									
								
								docs/colo-proxy.txt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										188
									
								
								docs/colo-proxy.txt
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,188 @@ | |||||||
|  | COLO-proxy | ||||||
|  | ---------- | ||||||
|  | Copyright (c) 2016 Intel Corporation | ||||||
|  | Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD. | ||||||
|  | Copyright (c) 2016 Fujitsu, Corp. | ||||||
|  | 
 | ||||||
|  | This work is licensed under the terms of the GNU GPL, version 2 or later. | ||||||
|  | See the COPYING file in the top-level directory. | ||||||
|  | 
 | ||||||
|  | This document gives an overview of COLO proxy's design. | ||||||
|  | 
 | ||||||
|  | == Background == | ||||||
|  | COLO-proxy is a part of COLO project. It is used | ||||||
|  | to compare the network package to help COLO decide | ||||||
|  | whether to do checkpoint. With COLO-proxy's help, | ||||||
|  | COLO greatly improves the performance. | ||||||
|  | 
 | ||||||
|  | The filter-redirector, filter-mirror, colo-compare | ||||||
|  | and filter-rewriter compose the COLO-proxy. | ||||||
|  | 
 | ||||||
|  | == Architecture == | ||||||
|  | 
 | ||||||
|  | COLO-Proxy is based on qemu netfilter and it's a plugin for qemu netfilter | ||||||
|  | (except colo-compare). It keep Secondary VM connect normally to | ||||||
|  | client and compare packets sent by PVM with sent by SVM. | ||||||
|  | If the packet difference, notify COLO-frame to do checkpoint and send | ||||||
|  | all primary packet has queued. Otherwise just send the queued primary | ||||||
|  | packet and drop the queued secondary packet. | ||||||
|  | 
 | ||||||
|  | Below is a COLO proxy ascii figure: | ||||||
|  | 
 | ||||||
|  |  Primary qemu                                                           Secondary qemu | ||||||
|  | +--------------------------------------------------------------+       +----------------------------------------------------------------+ | ||||||
|  | | +----------------------------------------------------------+ |       |  +-----------------------------------------------------------+ | | ||||||
|  | | |                                                          | |       |  |                                                           | | | ||||||
|  | | |                        guest                             | |       |  |                        guest                              | | | ||||||
|  | | |                                                          | |       |  |                                                           | | | ||||||
|  | | +-------^--------------------------+-----------------------+ |       |  +---------------------+--------+----------------------------+ | | ||||||
|  | |         |                          |                         |       |                        ^        |                              | | ||||||
|  | |         |                          |                         |       |                        |        |                              | | ||||||
|  | |         |  +------------------------------------------------------+  |                        |        |                              | | ||||||
|  | |netfilter|  |                       |                         |    |  |   netfilter            |        |                              | | ||||||
|  | | +----------+ +----------------------------+                  |    |  |  +-----------------------------------------------------------+ | | ||||||
|  | | |       |  |                       |      |        out       |    |  |  |                     |        |  filter excute order       | | | ||||||
|  | | |       |  |          +-----------------------------+        |    |  |  |                     |        | +------------------->      | | | ||||||
|  | | |       |  |          |            |      |         |        |    |  |  |                     |        |   TCP                      | | | ||||||
|  | | | +-----+--+-+  +-----v----+ +-----v----+ |pri +----+----+sec|    |  |  | +------------+  +---+----+---v+rewriter++  +------------+ | | | ||||||
|  | | | |          |  |          | |          | |in  |         |in |    |  |  | |            |  |        |              |  |            | | | | ||||||
|  | | | |  filter  |  |  filter  | |  filter  +------>  colo   <------+ +-------->  filter   +--> adjust |   adjust     +-->   filter   | | | | ||||||
|  | | | |  mirror  |  |redirector| |redirector| |    | compare |   |  |    |  | | redirector |  | ack    |   seq        |  | redirector | | | | ||||||
|  | | | |          |  |          | |          | |    |         |   |  |    |  | |            |  |        |              |  |            | | | | ||||||
|  | | | +----^-----+  +----+-----+ +----------+ |    +---------+   |  |    |  | +------------+  +--------+--------------+  +---+--------+ | | | ||||||
|  | | |      |   tx        |   rx           rx  |                  |  |    |  |            tx                        all       |  rx      | | | ||||||
|  | | |      |             |                    |                  |  |    |  +-----------------------------------------------------------+ | | ||||||
|  | | |      |             +--------------+     |                  |  |    |                                                   |            | | ||||||
|  | | |      |   filter excute order      |     |                  |  |    |                                                   |            | | ||||||
|  | | |      |  +---------------->        |     |                  |  +--------------------------------------------------------+            | | ||||||
|  | | +-----------------------------------------+                  |       |                                                                | | ||||||
|  | |        |                            |                        |       |                                                                | | ||||||
|  | +--------------------------------------------------------------+       +----------------------------------------------------------------+ | ||||||
|  |          |guest receive               | guest send | ||||||
|  |          |                            | | ||||||
|  | +--------+----------------------------v------------------------+ | ||||||
|  | |                                                              |                          NOTE: filter direction is rx/tx/all | ||||||
|  | |                         tap                                  |                          rx:receive packets sent to the netdev | ||||||
|  | |                                                              |                          tx:receive packets sent by the netdev | ||||||
|  | +--------------------------------------------------------------+ | ||||||
|  | 
 | ||||||
|  | 1.Guest receive packet route: | ||||||
|  | 
 | ||||||
|  | Primary: | ||||||
|  | 
 | ||||||
|  | Tap --> Mirror Client Filter | ||||||
|  | Mirror client will send packet to guest,at the | ||||||
|  | same time, copy and forward packet to secondary | ||||||
|  | mirror server. | ||||||
|  | 
 | ||||||
|  | Secondary: | ||||||
|  | 
 | ||||||
|  | Mirror Server Filter --> TCP Rewriter | ||||||
|  | If receive packet is TCP packet,we will adjust ack | ||||||
|  | and update TCP checksum, then send to secondary | ||||||
|  | guest. Otherwise directly send to guest. | ||||||
|  | 
 | ||||||
|  | 2.Guest send packet route: | ||||||
|  | 
 | ||||||
|  | Primary: | ||||||
|  | 
 | ||||||
|  | Guest --> Redirect Server Filter | ||||||
|  | Redirect server filter receive primary guest packet | ||||||
|  | but do nothing, just pass to next filter. | ||||||
|  | 
 | ||||||
|  | Redirect Server Filter --> COLO-Compare | ||||||
|  | COLO-compare receive primary guest packet then | ||||||
|  | waiting scondary redirect packet to compare it. | ||||||
|  | If packet same,send queued primary packet and clear | ||||||
|  | queued secondary packet, Otherwise send primary packet | ||||||
|  | and do checkpoint. | ||||||
|  | 
 | ||||||
|  | COLO-Compare --> Another Redirector Filter | ||||||
|  | The redirector get packet from colo-compare by use | ||||||
|  | chardev socket. | ||||||
|  | 
 | ||||||
|  | Redirector Filter --> Tap | ||||||
|  | Send the packet. | ||||||
|  | 
 | ||||||
|  | Secondary: | ||||||
|  | 
 | ||||||
|  | Guest --> TCP Rewriter Filter | ||||||
|  | If the packet is TCP packet,we will adjust seq | ||||||
|  | and update TCP checksum. Then send it to | ||||||
|  | redirect client filter. Otherwise directly send to | ||||||
|  | redirect client filter. | ||||||
|  | 
 | ||||||
|  | Redirect Client Filter --> Redirect Server Filter | ||||||
|  | Forward packet to primary. | ||||||
|  | 
 | ||||||
|  | == Components introduction == | ||||||
|  | 
 | ||||||
|  | Filter-mirror is a netfilter plugin. | ||||||
|  | It gives qemu the ability to mirror | ||||||
|  | packets to a chardev. | ||||||
|  | 
 | ||||||
|  | Filter-redirector is a netfilter plugin. | ||||||
|  | It gives qemu the ability to redirect net packet. | ||||||
|  | Redirector can redirect filter's net packet to outdev, | ||||||
|  | and redirect indev's packet to filter. | ||||||
|  | 
 | ||||||
|  |                     filter | ||||||
|  |                       + | ||||||
|  |           redirector  | | ||||||
|  |              +--------------+ | ||||||
|  |              |        |     | | ||||||
|  |              |        |     | | ||||||
|  |              |        |     | | ||||||
|  |   indev +---------+   +---------->  outdev | ||||||
|  |              |    |         | | ||||||
|  |              |    |         | | ||||||
|  |              |    |         | | ||||||
|  |              +--------------+ | ||||||
|  |                   | | ||||||
|  |                   v | ||||||
|  |                 filter | ||||||
|  | 
 | ||||||
|  | COLO-compare, we do packet comparing job. | ||||||
|  | Packets coming from the primary char indev will be sent to outdev. | ||||||
|  | Packets coming from the secondary char dev will be dropped after comparing. | ||||||
|  | COLO-comapre need two input chardev and one output chardev: | ||||||
|  | primary_in=chardev1-id (source: primary send packet) | ||||||
|  | secondary_in=chardev2-id (source: secondary send packet) | ||||||
|  | outdev=chardev3-id | ||||||
|  | 
 | ||||||
|  | Filter-rewriter will rewrite some of secondary packet to make | ||||||
|  | secondary guest's tcp connection established successfully. | ||||||
|  | In this module we will rewrite tcp packet's ack to the secondary | ||||||
|  | from primary,and rewrite tcp packet's seq to the primary from | ||||||
|  | secondary. | ||||||
|  | 
 | ||||||
|  | == Usage == | ||||||
|  | 
 | ||||||
|  | Here, we use demo ip and port discribe more clearly. | ||||||
|  | Primary(ip:3.3.3.3): | ||||||
|  | -netdev tap,id=hn0,vhost=off,script=/etc/qemu-ifup,downscript=/etc/qemu-ifdown | ||||||
|  | -device e1000,id=e0,netdev=hn0,mac=52:a4:00:12:78:66 | ||||||
|  | -chardev socket,id=mirror0,host=3.3.3.3,port=9003,server,nowait | ||||||
|  | -chardev socket,id=compare1,host=3.3.3.3,port=9004,server,nowait | ||||||
|  | -chardev socket,id=compare0,host=3.3.3.3,port=9001,server,nowait | ||||||
|  | -chardev socket,id=compare0-0,host=3.3.3.3,port=9001 | ||||||
|  | -chardev socket,id=compare_out,host=3.3.3.3,port=9005,server,nowait | ||||||
|  | -chardev socket,id=compare_out0,host=3.3.3.3,port=9005 | ||||||
|  | -object filter-mirror,id=m0,netdev=hn0,queue=tx,outdev=mirror0 | ||||||
|  | -object filter-redirector,netdev=hn0,id=redire0,queue=rx,indev=compare_out | ||||||
|  | -object filter-redirector,netdev=hn0,id=redire1,queue=rx,outdev=compare0 | ||||||
|  | -object colo-compare,id=comp0,primary_in=compare0-0,secondary_in=compare1,outdev=compare_out0 | ||||||
|  | 
 | ||||||
|  | Secondary(ip:3.3.3.8): | ||||||
|  | -netdev tap,id=hn0,vhost=off,script=/etc/qemu-ifup,down script=/etc/qemu-ifdown | ||||||
|  | -device e1000,netdev=hn0,mac=52:a4:00:12:78:66 | ||||||
|  | -chardev socket,id=red0,host=3.3.3.3,port=9003 | ||||||
|  | -chardev socket,id=red1,host=3.3.3.3,port=9004 | ||||||
|  | -object filter-redirector,id=f1,netdev=hn0,queue=tx,indev=red0 | ||||||
|  | -object filter-redirector,id=f2,netdev=hn0,queue=rx,outdev=red1 | ||||||
|  | 
 | ||||||
|  | Note: | ||||||
|  |   a.COLO-proxy must work with COLO-frame and Block-replication. | ||||||
|  |   b.Primary COLO must be started firstly, because COLO-proxy needs | ||||||
|  |     chardev socket server running before secondary started. | ||||||
|  |   c.Filter-rewriter only rewrite tcp packet. | ||||||
| @ -400,7 +400,7 @@ static void e1000e_write_config(PCIDevice *pci_dev, uint32_t address, | |||||||
| 
 | 
 | ||||||
|     if (range_covers_byte(address, len, PCI_COMMAND) && |     if (range_covers_byte(address, len, PCI_COMMAND) && | ||||||
|         (pci_dev->config[PCI_COMMAND] & PCI_COMMAND_MASTER)) { |         (pci_dev->config[PCI_COMMAND] & PCI_COMMAND_MASTER)) { | ||||||
|         qemu_flush_queued_packets(qemu_get_queue(s->nic)); |         e1000e_start_recv(&s->core); | ||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -953,7 +953,7 @@ e1000e_has_rxbufs(E1000ECore *core, const E1000E_RingInfo *r, | |||||||
|                          core->rx_desc_buf_size; |                          core->rx_desc_buf_size; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static inline void | void | ||||||
| e1000e_start_recv(E1000ECore *core) | e1000e_start_recv(E1000ECore *core) | ||||||
| { | { | ||||||
|     int i; |     int i; | ||||||
| @ -1710,7 +1710,8 @@ e1000e_receive_iov(E1000ECore *core, const struct iovec *iov, int iovcnt) | |||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|         /* Perform ACK receive detection */ |         /* Perform ACK receive detection */ | ||||||
|         if (e1000e_is_tcp_ack(core, core->rx_pkt)) { |         if  (!(core->mac[RFCTL] & E1000_RFCTL_ACK_DIS) && | ||||||
|  |              (e1000e_is_tcp_ack(core, core->rx_pkt))) { | ||||||
|             n |= E1000_ICS_ACK; |             n |= E1000_ICS_ACK; | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
| @ -1807,6 +1808,7 @@ e1000e_core_set_link_status(E1000ECore *core) | |||||||
|                                    core->autoneg_timer); |                                    core->autoneg_timer); | ||||||
|         } else { |         } else { | ||||||
|             e1000x_update_regs_on_link_up(core->mac, core->phy[0]); |             e1000x_update_regs_on_link_up(core->mac, core->phy[0]); | ||||||
|  |             e1000e_start_recv(core); | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
| @ -2007,19 +2009,23 @@ e1000e_msix_notify_one(E1000ECore *core, uint32_t cause, uint32_t int_cfg) | |||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     if (core->mac[CTRL_EXT] & E1000_CTRL_EXT_EIAME) { |     if (core->mac[CTRL_EXT] & E1000_CTRL_EXT_EIAME) { | ||||||
|         trace_e1000e_irq_ims_clear_eiame(core->mac[IAM], cause); |         trace_e1000e_irq_iam_clear_eiame(core->mac[IAM], cause); | ||||||
|         e1000e_clear_ims_bits(core, core->mac[IAM] & cause); |         core->mac[IAM] &= ~cause; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     trace_e1000e_irq_icr_clear_eiac(core->mac[ICR], core->mac[EIAC]); |     trace_e1000e_irq_icr_clear_eiac(core->mac[ICR], core->mac[EIAC]); | ||||||
| 
 | 
 | ||||||
|     if (core->mac[EIAC] & E1000_ICR_OTHER) { |     effective_eiac = core->mac[EIAC] & cause; | ||||||
|         effective_eiac = (core->mac[EIAC] & E1000_EIAC_MASK) | | 
 | ||||||
|                          E1000_ICR_OTHER_CAUSES; |     if (effective_eiac == E1000_ICR_OTHER) { | ||||||
|     } else { |         effective_eiac |= E1000_ICR_OTHER_CAUSES; | ||||||
|         effective_eiac = core->mac[EIAC] & E1000_EIAC_MASK; |  | ||||||
|     } |     } | ||||||
|  | 
 | ||||||
|     core->mac[ICR] &= ~effective_eiac; |     core->mac[ICR] &= ~effective_eiac; | ||||||
|  | 
 | ||||||
|  |     if (!(core->mac[CTRL_EXT] & E1000_CTRL_EXT_IAME)) { | ||||||
|  |         core->mac[IMS] &= ~effective_eiac; | ||||||
|  |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static void | static void | ||||||
| @ -2130,7 +2136,7 @@ e1000e_update_interrupt_state(E1000ECore *core) | |||||||
| 
 | 
 | ||||||
|     /* Set ICR[OTHER] for MSI-X */ |     /* Set ICR[OTHER] for MSI-X */ | ||||||
|     if (is_msix) { |     if (is_msix) { | ||||||
|         if (core->mac[ICR] & core->mac[IMS] & E1000_ICR_OTHER_CAUSES) { |         if (core->mac[ICR] & E1000_ICR_OTHER_CAUSES) { | ||||||
|             core->mac[ICR] |= E1000_ICR_OTHER; |             core->mac[ICR] |= E1000_ICR_OTHER; | ||||||
|             trace_e1000e_irq_add_msi_other(core->mac[ICR]); |             trace_e1000e_irq_add_msi_other(core->mac[ICR]); | ||||||
|         } |         } | ||||||
| @ -2168,7 +2174,7 @@ e1000e_update_interrupt_state(E1000ECore *core) | |||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static inline void | static void | ||||||
| e1000e_set_interrupt_cause(E1000ECore *core, uint32_t val) | e1000e_set_interrupt_cause(E1000ECore *core, uint32_t val) | ||||||
| { | { | ||||||
|     trace_e1000e_irq_set_cause_entry(val, core->mac[ICR]); |     trace_e1000e_irq_set_cause_entry(val, core->mac[ICR]); | ||||||
| @ -2187,6 +2193,8 @@ e1000e_autoneg_timer(void *opaque) | |||||||
|     E1000ECore *core = opaque; |     E1000ECore *core = opaque; | ||||||
|     if (!qemu_get_queue(core->owner_nic)->link_down) { |     if (!qemu_get_queue(core->owner_nic)->link_down) { | ||||||
|         e1000x_update_regs_on_autoneg_done(core->mac, core->phy[0]); |         e1000x_update_regs_on_autoneg_done(core->mac, core->phy[0]); | ||||||
|  |         e1000e_start_recv(core); | ||||||
|  | 
 | ||||||
|         e1000e_update_flowctl_status(core); |         e1000e_update_flowctl_status(core); | ||||||
|         /* signal link status change to the guest */ |         /* signal link status change to the guest */ | ||||||
|         e1000e_set_interrupt_cause(core, E1000_ICR_LSC); |         e1000e_set_interrupt_cause(core, E1000_ICR_LSC); | ||||||
| @ -2344,7 +2352,7 @@ e1000e_set_pbaclr(E1000ECore *core, int index, uint32_t val) | |||||||
| 
 | 
 | ||||||
|     core->mac[PBACLR] = val & E1000_PBACLR_VALID_MASK; |     core->mac[PBACLR] = val & E1000_PBACLR_VALID_MASK; | ||||||
| 
 | 
 | ||||||
|     if (msix_enabled(core->owner)) { |     if (!msix_enabled(core->owner)) { | ||||||
|         return; |         return; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -144,3 +144,6 @@ e1000e_receive(E1000ECore *core, const uint8_t *buf, size_t size); | |||||||
| 
 | 
 | ||||||
| ssize_t | ssize_t | ||||||
| e1000e_receive_iov(E1000ECore *core, const struct iovec *iov, int iovcnt); | e1000e_receive_iov(E1000ECore *core, const struct iovec *iov, int iovcnt); | ||||||
|  | 
 | ||||||
|  | void | ||||||
|  | e1000e_start_recv(E1000ECore *core); | ||||||
|  | |||||||
| @ -429,7 +429,7 @@ static void imx_fec_do_tx(IMXFECState *s) | |||||||
|         frame_size += len; |         frame_size += len; | ||||||
|         if (bd.flags & ENET_BD_L) { |         if (bd.flags & ENET_BD_L) { | ||||||
|             /* Last buffer in frame.  */ |             /* Last buffer in frame.  */ | ||||||
|             qemu_send_packet(qemu_get_queue(s->nic), frame, len); |             qemu_send_packet(qemu_get_queue(s->nic), frame, frame_size); | ||||||
|             ptr = frame; |             ptr = frame; | ||||||
|             frame_size = 0; |             frame_size = 0; | ||||||
|             s->regs[ENET_EIR] |= ENET_INT_TXF; |             s->regs[ENET_EIR] |= ENET_INT_TXF; | ||||||
|  | |||||||
| @ -23,6 +23,7 @@ do { printf("mcf_fec: " fmt , ## __VA_ARGS__); } while (0) | |||||||
| #define DPRINTF(fmt, ...) do {} while(0) | #define DPRINTF(fmt, ...) do {} while(0) | ||||||
| #endif | #endif | ||||||
| 
 | 
 | ||||||
|  | #define FEC_MAX_DESC 1024 | ||||||
| #define FEC_MAX_FRAME_SIZE 2032 | #define FEC_MAX_FRAME_SIZE 2032 | ||||||
| 
 | 
 | ||||||
| typedef struct { | typedef struct { | ||||||
| @ -149,7 +150,7 @@ static void mcf_fec_do_tx(mcf_fec_state *s) | |||||||
|     uint32_t addr; |     uint32_t addr; | ||||||
|     mcf_fec_bd bd; |     mcf_fec_bd bd; | ||||||
|     int frame_size; |     int frame_size; | ||||||
|     int len; |     int len, descnt = 0; | ||||||
|     uint8_t frame[FEC_MAX_FRAME_SIZE]; |     uint8_t frame[FEC_MAX_FRAME_SIZE]; | ||||||
|     uint8_t *ptr; |     uint8_t *ptr; | ||||||
| 
 | 
 | ||||||
| @ -157,7 +158,7 @@ static void mcf_fec_do_tx(mcf_fec_state *s) | |||||||
|     ptr = frame; |     ptr = frame; | ||||||
|     frame_size = 0; |     frame_size = 0; | ||||||
|     addr = s->tx_descriptor; |     addr = s->tx_descriptor; | ||||||
|     while (1) { |     while (descnt++ < FEC_MAX_DESC) { | ||||||
|         mcf_fec_read_bd(&bd, addr); |         mcf_fec_read_bd(&bd, addr); | ||||||
|         DPRINTF("tx_bd %x flags %04x len %d data %08x\n", |         DPRINTF("tx_bd %x flags %04x len %d data %08x\n", | ||||||
|                 addr, bd.flags, bd.length, bd.data); |                 addr, bd.flags, bd.length, bd.data); | ||||||
| @ -176,7 +177,7 @@ static void mcf_fec_do_tx(mcf_fec_state *s) | |||||||
|         if (bd.flags & FEC_BD_L) { |         if (bd.flags & FEC_BD_L) { | ||||||
|             /* Last buffer in frame.  */ |             /* Last buffer in frame.  */ | ||||||
|             DPRINTF("Sending packet\n"); |             DPRINTF("Sending packet\n"); | ||||||
|             qemu_send_packet(qemu_get_queue(s->nic), frame, len); |             qemu_send_packet(qemu_get_queue(s->nic), frame, frame_size); | ||||||
|             ptr = frame; |             ptr = frame; | ||||||
|             frame_size = 0; |             frame_size = 0; | ||||||
|             s->eir |= FEC_INT_TXF; |             s->eir |= FEC_INT_TXF; | ||||||
|  | |||||||
| @ -223,7 +223,7 @@ e1000e_irq_icr_read_entry(uint32_t icr) "Starting ICR read. Current ICR: 0x%x" | |||||||
| e1000e_irq_icr_read_exit(uint32_t icr) "Ending ICR read. Current ICR: 0x%x" | e1000e_irq_icr_read_exit(uint32_t icr) "Ending ICR read. Current ICR: 0x%x" | ||||||
| e1000e_irq_icr_clear_zero_ims(void) "Clearing ICR on read due to zero IMS" | e1000e_irq_icr_clear_zero_ims(void) "Clearing ICR on read due to zero IMS" | ||||||
| e1000e_irq_icr_clear_iame(void) "Clearing ICR on read due to IAME" | e1000e_irq_icr_clear_iame(void) "Clearing ICR on read due to IAME" | ||||||
| e1000e_irq_ims_clear_eiame(uint32_t iam, uint32_t cause) "Clearing IMS due to EIAME, IAM: 0x%X, cause: 0x%X" | e1000e_irq_iam_clear_eiame(uint32_t iam, uint32_t cause) "Clearing IMS due to EIAME, IAM: 0x%X, cause: 0x%X" | ||||||
| e1000e_irq_icr_clear_eiac(uint32_t icr, uint32_t eiac) "Clearing ICR bits due to EIAC, ICR: 0x%X, EIAC: 0x%X" | e1000e_irq_icr_clear_eiac(uint32_t icr, uint32_t eiac) "Clearing ICR bits due to EIAC, ICR: 0x%X, EIAC: 0x%X" | ||||||
| e1000e_irq_ims_clear_set_imc(uint32_t val) "Clearing IMS bits due to IMC write 0x%x" | e1000e_irq_ims_clear_set_imc(uint32_t val) "Clearing IMS bits due to IMC write 0x%x" | ||||||
| e1000e_irq_fire_delayed_interrupts(void) "Firing delayed interrupts" | e1000e_irq_fire_delayed_interrupts(void) "Firing delayed interrupts" | ||||||
|  | |||||||
| @ -31,6 +31,11 @@ | |||||||
| #define MAC_TABLE_ENTRIES    64 | #define MAC_TABLE_ENTRIES    64 | ||||||
| #define MAX_VLAN    (1 << 12)   /* Per 802.1Q definition */ | #define MAX_VLAN    (1 << 12)   /* Per 802.1Q definition */ | ||||||
| 
 | 
 | ||||||
|  | /* previously fixed value */ | ||||||
|  | #define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256 | ||||||
|  | /* for now, only allow larger queues; with virtio-1, guest can downsize */ | ||||||
|  | #define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE | ||||||
|  | 
 | ||||||
| /*
 | /*
 | ||||||
|  * Calculate the number of bytes up to and including the given 'field' of |  * Calculate the number of bytes up to and including the given 'field' of | ||||||
|  * 'container'. |  * 'container'. | ||||||
| @ -1412,7 +1417,8 @@ static void virtio_net_add_queue(VirtIONet *n, int index) | |||||||
| { | { | ||||||
|     VirtIODevice *vdev = VIRTIO_DEVICE(n); |     VirtIODevice *vdev = VIRTIO_DEVICE(n); | ||||||
| 
 | 
 | ||||||
|     n->vqs[index].rx_vq = virtio_add_queue(vdev, 256, virtio_net_handle_rx); |     n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size, | ||||||
|  |                                            virtio_net_handle_rx); | ||||||
|     if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) { |     if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) { | ||||||
|         n->vqs[index].tx_vq = |         n->vqs[index].tx_vq = | ||||||
|             virtio_add_queue(vdev, 256, virtio_net_handle_tx_timer); |             virtio_add_queue(vdev, 256, virtio_net_handle_tx_timer); | ||||||
| @ -1720,6 +1726,22 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp) | |||||||
|     virtio_net_set_config_size(n, n->host_features); |     virtio_net_set_config_size(n, n->host_features); | ||||||
|     virtio_init(vdev, "virtio-net", VIRTIO_ID_NET, n->config_size); |     virtio_init(vdev, "virtio-net", VIRTIO_ID_NET, n->config_size); | ||||||
| 
 | 
 | ||||||
|  |     /*
 | ||||||
|  |      * We set a lower limit on RX queue size to what it always was. | ||||||
|  |      * Guests that want a smaller ring can always resize it without | ||||||
|  |      * help from us (using virtio 1 and up). | ||||||
|  |      */ | ||||||
|  |     if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE || | ||||||
|  |         n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE || | ||||||
|  |         (n->net_conf.rx_queue_size & (n->net_conf.rx_queue_size - 1))) { | ||||||
|  |         error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), " | ||||||
|  |                    "must be a power of 2 between %d and %d.", | ||||||
|  |                    n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE, | ||||||
|  |                    VIRTQUEUE_MAX_SIZE); | ||||||
|  |         virtio_cleanup(vdev); | ||||||
|  |         return; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|     n->max_queues = MAX(n->nic_conf.peers.queues, 1); |     n->max_queues = MAX(n->nic_conf.peers.queues, 1); | ||||||
|     if (n->max_queues * 2 + 1 > VIRTIO_QUEUE_MAX) { |     if (n->max_queues * 2 + 1 > VIRTIO_QUEUE_MAX) { | ||||||
|         error_setg(errp, "Invalid number of queues (= %" PRIu32 "), " |         error_setg(errp, "Invalid number of queues (= %" PRIu32 "), " | ||||||
| @ -1880,6 +1902,8 @@ static Property virtio_net_properties[] = { | |||||||
|                        TX_TIMER_INTERVAL), |                        TX_TIMER_INTERVAL), | ||||||
|     DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST), |     DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST), | ||||||
|     DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx), |     DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx), | ||||||
|  |     DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size, | ||||||
|  |                        VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE), | ||||||
|     DEFINE_PROP_END_OF_LIST(), |     DEFINE_PROP_END_OF_LIST(), | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -35,6 +35,7 @@ typedef struct virtio_net_conf | |||||||
|     uint32_t txtimer; |     uint32_t txtimer; | ||||||
|     int32_t txburst; |     int32_t txburst; | ||||||
|     char *tx; |     char *tx; | ||||||
|  |     uint16_t rx_queue_size; | ||||||
| } virtio_net_conf; | } virtio_net_conf; | ||||||
| 
 | 
 | ||||||
| /* Maximum packet size we can receive from tap device: header + 64k */ | /* Maximum packet size we can receive from tap device: header + 64k */ | ||||||
|  | |||||||
							
								
								
									
										59
									
								
								include/qemu/jhash.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										59
									
								
								include/qemu/jhash.h
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,59 @@ | |||||||
|  | /* jhash.h: Jenkins hash support.
 | ||||||
|  |   * | ||||||
|  |   * Copyright (C) 2006. Bob Jenkins (bob_jenkins@burtleburtle.net) | ||||||
|  |   * | ||||||
|  |   * http://burtleburtle.net/bob/hash/
 | ||||||
|  |   * | ||||||
|  |   * These are the credits from Bob's sources: | ||||||
|  |   * | ||||||
|  |   * lookup3.c, by Bob Jenkins, May 2006, Public Domain. | ||||||
|  |   * | ||||||
|  |   * These are functions for producing 32-bit hashes for hash table lookup. | ||||||
|  |   * hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final() | ||||||
|  |   * are externally useful functions.  Routines to test the hash are included | ||||||
|  |   * if SELF_TEST is defined.  You can use this free for any purpose. It's in | ||||||
|  |   * the public domain.  It has no warranty. | ||||||
|  |   * | ||||||
|  |   * Copyright (C) 2009-2010 Jozsef Kadlecsik (kadlec@blackhole.kfki.hu) | ||||||
|  |   * | ||||||
|  |   * I've modified Bob's hash to be useful in the Linux kernel, and | ||||||
|  |   * any bugs present are my fault. | ||||||
|  |   * Jozsef | ||||||
|  |   */ | ||||||
|  | 
 | ||||||
|  | #ifndef QEMU_JHASH_H__ | ||||||
|  | #define QEMU_JHASH_H__ | ||||||
|  | 
 | ||||||
|  | #include "qemu/bitops.h" | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  |  * hashtable relation copy from linux kernel jhash | ||||||
|  |  */ | ||||||
|  | 
 | ||||||
|  | /* __jhash_mix -- mix 3 32-bit values reversibly. */ | ||||||
|  | #define __jhash_mix(a, b, c)                \ | ||||||
|  | {                                           \ | ||||||
|  |     a -= c;  a ^= rol32(c, 4);  c += b;     \ | ||||||
|  |     b -= a;  b ^= rol32(a, 6);  a += c;     \ | ||||||
|  |     c -= b;  c ^= rol32(b, 8);  b += a;     \ | ||||||
|  |     a -= c;  a ^= rol32(c, 16); c += b;     \ | ||||||
|  |     b -= a;  b ^= rol32(a, 19); a += c;     \ | ||||||
|  |     c -= b;  c ^= rol32(b, 4);  b += a;     \ | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | /* __jhash_final - final mixing of 3 32-bit values (a,b,c) into c */ | ||||||
|  | #define __jhash_final(a, b, c)  \ | ||||||
|  | {                               \ | ||||||
|  |     c ^= b; c -= rol32(b, 14);  \ | ||||||
|  |     a ^= c; a -= rol32(c, 11);  \ | ||||||
|  |     b ^= a; b -= rol32(a, 25);  \ | ||||||
|  |     c ^= b; c -= rol32(b, 16);  \ | ||||||
|  |     a ^= c; a -= rol32(c, 4);   \ | ||||||
|  |     b ^= a; b -= rol32(a, 14);  \ | ||||||
|  |     c ^= b; c -= rol32(b, 24);  \ | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | /* An arbitrary initial parameter */ | ||||||
|  | #define JHASH_INITVAL           0xdeadbeef | ||||||
|  | 
 | ||||||
|  | #endif /* QEMU_JHASH_H__ */ | ||||||
| @ -65,7 +65,8 @@ struct CharDriverState { | |||||||
|     int (*chr_sync_read)(struct CharDriverState *s, |     int (*chr_sync_read)(struct CharDriverState *s, | ||||||
|                          const uint8_t *buf, int len); |                          const uint8_t *buf, int len); | ||||||
|     GSource *(*chr_add_watch)(struct CharDriverState *s, GIOCondition cond); |     GSource *(*chr_add_watch)(struct CharDriverState *s, GIOCondition cond); | ||||||
|     void (*chr_update_read_handler)(struct CharDriverState *s); |     void (*chr_update_read_handler)(struct CharDriverState *s, | ||||||
|  |                                     GMainContext *context); | ||||||
|     int (*chr_ioctl)(struct CharDriverState *s, int cmd, void *arg); |     int (*chr_ioctl)(struct CharDriverState *s, int cmd, void *arg); | ||||||
|     int (*get_msgfds)(struct CharDriverState *s, int* fds, int num); |     int (*get_msgfds)(struct CharDriverState *s, int* fds, int num); | ||||||
|     int (*set_msgfds)(struct CharDriverState *s, int *fds, int num); |     int (*set_msgfds)(struct CharDriverState *s, int *fds, int num); | ||||||
| @ -422,6 +423,14 @@ void qemu_chr_add_handlers(CharDriverState *s, | |||||||
|                            IOEventHandler *fd_event, |                            IOEventHandler *fd_event, | ||||||
|                            void *opaque); |                            void *opaque); | ||||||
| 
 | 
 | ||||||
|  | /* This API can make handler run in the context what you pass to. */ | ||||||
|  | void qemu_chr_add_handlers_full(CharDriverState *s, | ||||||
|  |                                 IOCanReadHandler *fd_can_read, | ||||||
|  |                                 IOReadHandler *fd_read, | ||||||
|  |                                 IOEventHandler *fd_event, | ||||||
|  |                                 void *opaque, | ||||||
|  |                                 GMainContext *context); | ||||||
|  | 
 | ||||||
| void qemu_chr_be_generic_open(CharDriverState *s); | void qemu_chr_be_generic_open(CharDriverState *s); | ||||||
| void qemu_chr_accept_input(CharDriverState *s); | void qemu_chr_accept_input(CharDriverState *s); | ||||||
| int qemu_chr_add_client(CharDriverState *s, int fd); | int qemu_chr_add_client(CharDriverState *s, int fd); | ||||||
|  | |||||||
| @ -16,3 +16,6 @@ common-obj-$(CONFIG_NETMAP) += netmap.o | |||||||
| common-obj-y += filter.o | common-obj-y += filter.o | ||||||
| common-obj-y += filter-buffer.o | common-obj-y += filter-buffer.o | ||||||
| common-obj-y += filter-mirror.o | common-obj-y += filter-mirror.o | ||||||
|  | common-obj-y += colo-compare.o | ||||||
|  | common-obj-y += colo.o | ||||||
|  | common-obj-y += filter-rewriter.o | ||||||
|  | |||||||
							
								
								
									
										781
									
								
								net/colo-compare.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										781
									
								
								net/colo-compare.c
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,781 @@ | |||||||
|  | /*
 | ||||||
|  |  * COarse-grain LOck-stepping Virtual Machines for Non-stop Service (COLO) | ||||||
|  |  * (a.k.a. Fault Tolerance or Continuous Replication) | ||||||
|  |  * | ||||||
|  |  * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD. | ||||||
|  |  * Copyright (c) 2016 FUJITSU LIMITED | ||||||
|  |  * Copyright (c) 2016 Intel Corporation | ||||||
|  |  * | ||||||
|  |  * Author: Zhang Chen <zhangchen.fnst@cn.fujitsu.com> | ||||||
|  |  * | ||||||
|  |  * This work is licensed under the terms of the GNU GPL, version 2 or | ||||||
|  |  * later.  See the COPYING file in the top-level directory. | ||||||
|  |  */ | ||||||
|  | 
 | ||||||
|  | #include "qemu/osdep.h" | ||||||
|  | #include "qemu/error-report.h" | ||||||
|  | #include "trace.h" | ||||||
|  | #include "qemu-common.h" | ||||||
|  | #include "qapi/qmp/qerror.h" | ||||||
|  | #include "qapi/error.h" | ||||||
|  | #include "net/net.h" | ||||||
|  | #include "net/eth.h" | ||||||
|  | #include "qom/object_interfaces.h" | ||||||
|  | #include "qemu/iov.h" | ||||||
|  | #include "qom/object.h" | ||||||
|  | #include "qemu/typedefs.h" | ||||||
|  | #include "net/queue.h" | ||||||
|  | #include "sysemu/char.h" | ||||||
|  | #include "qemu/sockets.h" | ||||||
|  | #include "qapi-visit.h" | ||||||
|  | #include "net/colo.h" | ||||||
|  | 
 | ||||||
|  | #define TYPE_COLO_COMPARE "colo-compare" | ||||||
|  | #define COLO_COMPARE(obj) \ | ||||||
|  |     OBJECT_CHECK(CompareState, (obj), TYPE_COLO_COMPARE) | ||||||
|  | 
 | ||||||
|  | #define COMPARE_READ_LEN_MAX NET_BUFSIZE | ||||||
|  | #define MAX_QUEUE_SIZE 1024 | ||||||
|  | 
 | ||||||
|  | /* TODO: Should be configurable */ | ||||||
|  | #define REGULAR_PACKET_CHECK_MS 3000 | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  |   + CompareState ++ | ||||||
|  |   |               | | ||||||
|  |   +---------------+   +---------------+         +---------------+ | ||||||
|  |   |conn list      +--->conn           +--------->conn           | | ||||||
|  |   +---------------+   +---------------+         +---------------+ | ||||||
|  |   |               |     |           |             |          | | ||||||
|  |   +---------------+ +---v----+  +---v----+    +---v----+ +---v----+ | ||||||
|  |                     |primary |  |secondary    |primary | |secondary | ||||||
|  |                     |packet  |  |packet  +    |packet  | |packet  + | ||||||
|  |                     +--------+  +--------+    +--------+ +--------+ | ||||||
|  |                         |           |             |          | | ||||||
|  |                     +---v----+  +---v----+    +---v----+ +---v----+ | ||||||
|  |                     |primary |  |secondary    |primary | |secondary | ||||||
|  |                     |packet  |  |packet  +    |packet  | |packet  + | ||||||
|  |                     +--------+  +--------+    +--------+ +--------+ | ||||||
|  |                         |           |             |          | | ||||||
|  |                     +---v----+  +---v----+    +---v----+ +---v----+ | ||||||
|  |                     |primary |  |secondary    |primary | |secondary | ||||||
|  |                     |packet  |  |packet  +    |packet  | |packet  + | ||||||
|  |                     +--------+  +--------+    +--------+ +--------+ | ||||||
|  | */ | ||||||
|  | typedef struct CompareState { | ||||||
|  |     Object parent; | ||||||
|  | 
 | ||||||
|  |     char *pri_indev; | ||||||
|  |     char *sec_indev; | ||||||
|  |     char *outdev; | ||||||
|  |     CharDriverState *chr_pri_in; | ||||||
|  |     CharDriverState *chr_sec_in; | ||||||
|  |     CharDriverState *chr_out; | ||||||
|  |     SocketReadState pri_rs; | ||||||
|  |     SocketReadState sec_rs; | ||||||
|  | 
 | ||||||
|  |     /* connection list: the connections belonged to this NIC could be found
 | ||||||
|  |      * in this list. | ||||||
|  |      * element type: Connection | ||||||
|  |      */ | ||||||
|  |     GQueue conn_list; | ||||||
|  |     /* hashtable to save connection */ | ||||||
|  |     GHashTable *connection_track_table; | ||||||
|  |     /* compare thread, a thread for each NIC */ | ||||||
|  |     QemuThread thread; | ||||||
|  |     /* Timer used on the primary to find packets that are never matched */ | ||||||
|  |     QEMUTimer *timer; | ||||||
|  |     QemuMutex timer_check_lock; | ||||||
|  | } CompareState; | ||||||
|  | 
 | ||||||
|  | typedef struct CompareClass { | ||||||
|  |     ObjectClass parent_class; | ||||||
|  | } CompareClass; | ||||||
|  | 
 | ||||||
|  | typedef struct CompareChardevProps { | ||||||
|  |     bool is_socket; | ||||||
|  | } CompareChardevProps; | ||||||
|  | 
 | ||||||
|  | enum { | ||||||
|  |     PRIMARY_IN = 0, | ||||||
|  |     SECONDARY_IN, | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | static int compare_chr_send(CharDriverState *out, | ||||||
|  |                             const uint8_t *buf, | ||||||
|  |                             uint32_t size); | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  |  * Return 0 on success, if return -1 means the pkt | ||||||
|  |  * is unsupported(arp and ipv6) and will be sent later | ||||||
|  |  */ | ||||||
|  | static int packet_enqueue(CompareState *s, int mode) | ||||||
|  | { | ||||||
|  |     ConnectionKey key; | ||||||
|  |     Packet *pkt = NULL; | ||||||
|  |     Connection *conn; | ||||||
|  | 
 | ||||||
|  |     if (mode == PRIMARY_IN) { | ||||||
|  |         pkt = packet_new(s->pri_rs.buf, s->pri_rs.packet_len); | ||||||
|  |     } else { | ||||||
|  |         pkt = packet_new(s->sec_rs.buf, s->sec_rs.packet_len); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     if (parse_packet_early(pkt)) { | ||||||
|  |         packet_destroy(pkt, NULL); | ||||||
|  |         pkt = NULL; | ||||||
|  |         return -1; | ||||||
|  |     } | ||||||
|  |     fill_connection_key(pkt, &key); | ||||||
|  | 
 | ||||||
|  |     conn = connection_get(s->connection_track_table, | ||||||
|  |                           &key, | ||||||
|  |                           &s->conn_list); | ||||||
|  | 
 | ||||||
|  |     if (!conn->processing) { | ||||||
|  |         g_queue_push_tail(&s->conn_list, conn); | ||||||
|  |         conn->processing = true; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     if (mode == PRIMARY_IN) { | ||||||
|  |         if (g_queue_get_length(&conn->primary_list) <= | ||||||
|  |                                MAX_QUEUE_SIZE) { | ||||||
|  |             g_queue_push_tail(&conn->primary_list, pkt); | ||||||
|  |         } else { | ||||||
|  |             error_report("colo compare primary queue size too big," | ||||||
|  |                          "drop packet"); | ||||||
|  |         } | ||||||
|  |     } else { | ||||||
|  |         if (g_queue_get_length(&conn->secondary_list) <= | ||||||
|  |                                MAX_QUEUE_SIZE) { | ||||||
|  |             g_queue_push_tail(&conn->secondary_list, pkt); | ||||||
|  |         } else { | ||||||
|  |             error_report("colo compare secondary queue size too big," | ||||||
|  |                          "drop packet"); | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     return 0; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  |  * The IP packets sent by primary and secondary | ||||||
|  |  * will be compared in here | ||||||
|  |  * TODO support ip fragment, Out-Of-Order | ||||||
|  |  * return:    0  means packet same | ||||||
|  |  *            > 0 || < 0 means packet different | ||||||
|  |  */ | ||||||
|  | static int colo_packet_compare(Packet *ppkt, Packet *spkt) | ||||||
|  | { | ||||||
|  |     trace_colo_compare_ip_info(ppkt->size, inet_ntoa(ppkt->ip->ip_src), | ||||||
|  |                                inet_ntoa(ppkt->ip->ip_dst), spkt->size, | ||||||
|  |                                inet_ntoa(spkt->ip->ip_src), | ||||||
|  |                                inet_ntoa(spkt->ip->ip_dst)); | ||||||
|  | 
 | ||||||
|  |     if (ppkt->size == spkt->size) { | ||||||
|  |         return memcmp(ppkt->data, spkt->data, spkt->size); | ||||||
|  |     } else { | ||||||
|  |         return -1; | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  |  * Called from the compare thread on the primary | ||||||
|  |  * for compare tcp packet | ||||||
|  |  * compare_tcp copied from Dr. David Alan Gilbert's branch | ||||||
|  |  */ | ||||||
|  | static int colo_packet_compare_tcp(Packet *spkt, Packet *ppkt) | ||||||
|  | { | ||||||
|  |     struct tcphdr *ptcp, *stcp; | ||||||
|  |     int res; | ||||||
|  |     char *sdebug, *ddebug; | ||||||
|  | 
 | ||||||
|  |     trace_colo_compare_main("compare tcp"); | ||||||
|  |     if (ppkt->size != spkt->size) { | ||||||
|  |         if (trace_event_get_state(TRACE_COLO_COMPARE_MISCOMPARE)) { | ||||||
|  |             trace_colo_compare_main("pkt size not same"); | ||||||
|  |         } | ||||||
|  |         return -1; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     ptcp = (struct tcphdr *)ppkt->transport_header; | ||||||
|  |     stcp = (struct tcphdr *)spkt->transport_header; | ||||||
|  | 
 | ||||||
|  |     /*
 | ||||||
|  |      * The 'identification' field in the IP header is *very* random | ||||||
|  |      * it almost never matches.  Fudge this by ignoring differences in | ||||||
|  |      * unfragmented packets; they'll normally sort themselves out if different | ||||||
|  |      * anyway, and it should recover at the TCP level. | ||||||
|  |      * An alternative would be to get both the primary and secondary to rewrite | ||||||
|  |      * somehow; but that would need some sync traffic to sync the state | ||||||
|  |      */ | ||||||
|  |     if (ntohs(ppkt->ip->ip_off) & IP_DF) { | ||||||
|  |         spkt->ip->ip_id = ppkt->ip->ip_id; | ||||||
|  |         /* and the sum will be different if the IDs were different */ | ||||||
|  |         spkt->ip->ip_sum = ppkt->ip->ip_sum; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     res = memcmp(ppkt->data + ETH_HLEN, spkt->data + ETH_HLEN, | ||||||
|  |                 (spkt->size - ETH_HLEN)); | ||||||
|  | 
 | ||||||
|  |     if (res != 0 && trace_event_get_state(TRACE_COLO_COMPARE_MISCOMPARE)) { | ||||||
|  |         sdebug = strdup(inet_ntoa(ppkt->ip->ip_src)); | ||||||
|  |         ddebug = strdup(inet_ntoa(ppkt->ip->ip_dst)); | ||||||
|  |         fprintf(stderr, "%s: src/dst: %s/%s p: seq/ack=%u/%u" | ||||||
|  |                 " s: seq/ack=%u/%u res=%d flags=%x/%x\n", | ||||||
|  |                 __func__, sdebug, ddebug, | ||||||
|  |                 (unsigned int)ntohl(ptcp->th_seq), | ||||||
|  |                 (unsigned int)ntohl(ptcp->th_ack), | ||||||
|  |                 (unsigned int)ntohl(stcp->th_seq), | ||||||
|  |                 (unsigned int)ntohl(stcp->th_ack), | ||||||
|  |                 res, ptcp->th_flags, stcp->th_flags); | ||||||
|  | 
 | ||||||
|  |         fprintf(stderr, "Primary len = %d\n", ppkt->size); | ||||||
|  |         qemu_hexdump((char *)ppkt->data, stderr, "colo-compare", ppkt->size); | ||||||
|  |         fprintf(stderr, "Secondary len = %d\n", spkt->size); | ||||||
|  |         qemu_hexdump((char *)spkt->data, stderr, "colo-compare", spkt->size); | ||||||
|  | 
 | ||||||
|  |         g_free(sdebug); | ||||||
|  |         g_free(ddebug); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     return res; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  |  * Called from the compare thread on the primary | ||||||
|  |  * for compare udp packet | ||||||
|  |  */ | ||||||
|  | static int colo_packet_compare_udp(Packet *spkt, Packet *ppkt) | ||||||
|  | { | ||||||
|  |     int ret; | ||||||
|  | 
 | ||||||
|  |     trace_colo_compare_main("compare udp"); | ||||||
|  |     ret = colo_packet_compare(ppkt, spkt); | ||||||
|  | 
 | ||||||
|  |     if (ret) { | ||||||
|  |         trace_colo_compare_udp_miscompare("primary pkt size", ppkt->size); | ||||||
|  |         qemu_hexdump((char *)ppkt->data, stderr, "colo-compare", ppkt->size); | ||||||
|  |         trace_colo_compare_udp_miscompare("Secondary pkt size", spkt->size); | ||||||
|  |         qemu_hexdump((char *)spkt->data, stderr, "colo-compare", spkt->size); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     return ret; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  |  * Called from the compare thread on the primary | ||||||
|  |  * for compare icmp packet | ||||||
|  |  */ | ||||||
|  | static int colo_packet_compare_icmp(Packet *spkt, Packet *ppkt) | ||||||
|  | { | ||||||
|  |     int network_length; | ||||||
|  | 
 | ||||||
|  |     trace_colo_compare_main("compare icmp"); | ||||||
|  |     network_length = ppkt->ip->ip_hl * 4; | ||||||
|  |     if (ppkt->size != spkt->size || | ||||||
|  |         ppkt->size < network_length + ETH_HLEN) { | ||||||
|  |         return -1; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     if (colo_packet_compare(ppkt, spkt)) { | ||||||
|  |         trace_colo_compare_icmp_miscompare("primary pkt size", | ||||||
|  |                                            ppkt->size); | ||||||
|  |         qemu_hexdump((char *)ppkt->data, stderr, "colo-compare", | ||||||
|  |                      ppkt->size); | ||||||
|  |         trace_colo_compare_icmp_miscompare("Secondary pkt size", | ||||||
|  |                                            spkt->size); | ||||||
|  |         qemu_hexdump((char *)spkt->data, stderr, "colo-compare", | ||||||
|  |                      spkt->size); | ||||||
|  |         return -1; | ||||||
|  |     } else { | ||||||
|  |         return 0; | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  |  * Called from the compare thread on the primary | ||||||
|  |  * for compare other packet | ||||||
|  |  */ | ||||||
|  | static int colo_packet_compare_other(Packet *spkt, Packet *ppkt) | ||||||
|  | { | ||||||
|  |     trace_colo_compare_main("compare other"); | ||||||
|  |     trace_colo_compare_ip_info(ppkt->size, inet_ntoa(ppkt->ip->ip_src), | ||||||
|  |                                inet_ntoa(ppkt->ip->ip_dst), spkt->size, | ||||||
|  |                                inet_ntoa(spkt->ip->ip_src), | ||||||
|  |                                inet_ntoa(spkt->ip->ip_dst)); | ||||||
|  |     return colo_packet_compare(ppkt, spkt); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static int colo_old_packet_check_one(Packet *pkt, int64_t *check_time) | ||||||
|  | { | ||||||
|  |     int64_t now = qemu_clock_get_ms(QEMU_CLOCK_HOST); | ||||||
|  | 
 | ||||||
|  |     if ((now - pkt->creation_ms) > (*check_time)) { | ||||||
|  |         trace_colo_old_packet_check_found(pkt->creation_ms); | ||||||
|  |         return 0; | ||||||
|  |     } else { | ||||||
|  |         return 1; | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static void colo_old_packet_check_one_conn(void *opaque, | ||||||
|  |                                            void *user_data) | ||||||
|  | { | ||||||
|  |     Connection *conn = opaque; | ||||||
|  |     GList *result = NULL; | ||||||
|  |     int64_t check_time = REGULAR_PACKET_CHECK_MS; | ||||||
|  | 
 | ||||||
|  |     result = g_queue_find_custom(&conn->primary_list, | ||||||
|  |                                  &check_time, | ||||||
|  |                                  (GCompareFunc)colo_old_packet_check_one); | ||||||
|  | 
 | ||||||
|  |     if (result) { | ||||||
|  |         /* do checkpoint will flush old packet */ | ||||||
|  |         /* TODO: colo_notify_checkpoint();*/ | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  |  * Look for old packets that the secondary hasn't matched, | ||||||
|  |  * if we have some then we have to checkpoint to wake | ||||||
|  |  * the secondary up. | ||||||
|  |  */ | ||||||
|  | static void colo_old_packet_check(void *opaque) | ||||||
|  | { | ||||||
|  |     CompareState *s = opaque; | ||||||
|  | 
 | ||||||
|  |     g_queue_foreach(&s->conn_list, colo_old_packet_check_one_conn, NULL); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  |  * Called from the compare thread on the primary | ||||||
|  |  * for compare connection | ||||||
|  |  */ | ||||||
|  | static void colo_compare_connection(void *opaque, void *user_data) | ||||||
|  | { | ||||||
|  |     CompareState *s = user_data; | ||||||
|  |     Connection *conn = opaque; | ||||||
|  |     Packet *pkt = NULL; | ||||||
|  |     GList *result = NULL; | ||||||
|  |     int ret; | ||||||
|  | 
 | ||||||
|  |     while (!g_queue_is_empty(&conn->primary_list) && | ||||||
|  |            !g_queue_is_empty(&conn->secondary_list)) { | ||||||
|  |         qemu_mutex_lock(&s->timer_check_lock); | ||||||
|  |         pkt = g_queue_pop_tail(&conn->primary_list); | ||||||
|  |         qemu_mutex_unlock(&s->timer_check_lock); | ||||||
|  |         switch (conn->ip_proto) { | ||||||
|  |         case IPPROTO_TCP: | ||||||
|  |             result = g_queue_find_custom(&conn->secondary_list, | ||||||
|  |                      pkt, (GCompareFunc)colo_packet_compare_tcp); | ||||||
|  |             break; | ||||||
|  |         case IPPROTO_UDP: | ||||||
|  |             result = g_queue_find_custom(&conn->secondary_list, | ||||||
|  |                      pkt, (GCompareFunc)colo_packet_compare_udp); | ||||||
|  |             break; | ||||||
|  |         case IPPROTO_ICMP: | ||||||
|  |             result = g_queue_find_custom(&conn->secondary_list, | ||||||
|  |                      pkt, (GCompareFunc)colo_packet_compare_icmp); | ||||||
|  |             break; | ||||||
|  |         default: | ||||||
|  |             result = g_queue_find_custom(&conn->secondary_list, | ||||||
|  |                      pkt, (GCompareFunc)colo_packet_compare_other); | ||||||
|  |             break; | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         if (result) { | ||||||
|  |             ret = compare_chr_send(s->chr_out, pkt->data, pkt->size); | ||||||
|  |             if (ret < 0) { | ||||||
|  |                 error_report("colo_send_primary_packet failed"); | ||||||
|  |             } | ||||||
|  |             trace_colo_compare_main("packet same and release packet"); | ||||||
|  |             g_queue_remove(&conn->secondary_list, result->data); | ||||||
|  |             packet_destroy(pkt, NULL); | ||||||
|  |         } else { | ||||||
|  |             /*
 | ||||||
|  |              * If one packet arrive late, the secondary_list or | ||||||
|  |              * primary_list will be empty, so we can't compare it | ||||||
|  |              * until next comparison. | ||||||
|  |              */ | ||||||
|  |             trace_colo_compare_main("packet different"); | ||||||
|  |             qemu_mutex_lock(&s->timer_check_lock); | ||||||
|  |             g_queue_push_tail(&conn->primary_list, pkt); | ||||||
|  |             qemu_mutex_unlock(&s->timer_check_lock); | ||||||
|  |             /* TODO: colo_notify_checkpoint();*/ | ||||||
|  |             break; | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static int compare_chr_send(CharDriverState *out, | ||||||
|  |                             const uint8_t *buf, | ||||||
|  |                             uint32_t size) | ||||||
|  | { | ||||||
|  |     int ret = 0; | ||||||
|  |     uint32_t len = htonl(size); | ||||||
|  | 
 | ||||||
|  |     if (!size) { | ||||||
|  |         return 0; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     ret = qemu_chr_fe_write_all(out, (uint8_t *)&len, sizeof(len)); | ||||||
|  |     if (ret != sizeof(len)) { | ||||||
|  |         goto err; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     ret = qemu_chr_fe_write_all(out, (uint8_t *)buf, size); | ||||||
|  |     if (ret != size) { | ||||||
|  |         goto err; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     return 0; | ||||||
|  | 
 | ||||||
|  | err: | ||||||
|  |     return ret < 0 ? ret : -EIO; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static int compare_chr_can_read(void *opaque) | ||||||
|  | { | ||||||
|  |     return COMPARE_READ_LEN_MAX; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  |  * Called from the main thread on the primary for packets | ||||||
|  |  * arriving over the socket from the primary. | ||||||
|  |  */ | ||||||
|  | static void compare_pri_chr_in(void *opaque, const uint8_t *buf, int size) | ||||||
|  | { | ||||||
|  |     CompareState *s = COLO_COMPARE(opaque); | ||||||
|  |     int ret; | ||||||
|  | 
 | ||||||
|  |     ret = net_fill_rstate(&s->pri_rs, buf, size); | ||||||
|  |     if (ret == -1) { | ||||||
|  |         qemu_chr_add_handlers(s->chr_pri_in, NULL, NULL, NULL, NULL); | ||||||
|  |         error_report("colo-compare primary_in error"); | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  |  * Called from the main thread on the primary for packets | ||||||
|  |  * arriving over the socket from the secondary. | ||||||
|  |  */ | ||||||
|  | static void compare_sec_chr_in(void *opaque, const uint8_t *buf, int size) | ||||||
|  | { | ||||||
|  |     CompareState *s = COLO_COMPARE(opaque); | ||||||
|  |     int ret; | ||||||
|  | 
 | ||||||
|  |     ret = net_fill_rstate(&s->sec_rs, buf, size); | ||||||
|  |     if (ret == -1) { | ||||||
|  |         qemu_chr_add_handlers(s->chr_sec_in, NULL, NULL, NULL, NULL); | ||||||
|  |         error_report("colo-compare secondary_in error"); | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static void *colo_compare_thread(void *opaque) | ||||||
|  | { | ||||||
|  |     GMainContext *worker_context; | ||||||
|  |     GMainLoop *compare_loop; | ||||||
|  |     CompareState *s = opaque; | ||||||
|  | 
 | ||||||
|  |     worker_context = g_main_context_new(); | ||||||
|  | 
 | ||||||
|  |     qemu_chr_add_handlers_full(s->chr_pri_in, compare_chr_can_read, | ||||||
|  |                           compare_pri_chr_in, NULL, s, worker_context); | ||||||
|  |     qemu_chr_add_handlers_full(s->chr_sec_in, compare_chr_can_read, | ||||||
|  |                           compare_sec_chr_in, NULL, s, worker_context); | ||||||
|  | 
 | ||||||
|  |     compare_loop = g_main_loop_new(worker_context, FALSE); | ||||||
|  | 
 | ||||||
|  |     g_main_loop_run(compare_loop); | ||||||
|  | 
 | ||||||
|  |     g_main_loop_unref(compare_loop); | ||||||
|  |     g_main_context_unref(worker_context); | ||||||
|  |     return NULL; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static char *compare_get_pri_indev(Object *obj, Error **errp) | ||||||
|  | { | ||||||
|  |     CompareState *s = COLO_COMPARE(obj); | ||||||
|  | 
 | ||||||
|  |     return g_strdup(s->pri_indev); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static void compare_set_pri_indev(Object *obj, const char *value, Error **errp) | ||||||
|  | { | ||||||
|  |     CompareState *s = COLO_COMPARE(obj); | ||||||
|  | 
 | ||||||
|  |     g_free(s->pri_indev); | ||||||
|  |     s->pri_indev = g_strdup(value); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static char *compare_get_sec_indev(Object *obj, Error **errp) | ||||||
|  | { | ||||||
|  |     CompareState *s = COLO_COMPARE(obj); | ||||||
|  | 
 | ||||||
|  |     return g_strdup(s->sec_indev); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static void compare_set_sec_indev(Object *obj, const char *value, Error **errp) | ||||||
|  | { | ||||||
|  |     CompareState *s = COLO_COMPARE(obj); | ||||||
|  | 
 | ||||||
|  |     g_free(s->sec_indev); | ||||||
|  |     s->sec_indev = g_strdup(value); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static char *compare_get_outdev(Object *obj, Error **errp) | ||||||
|  | { | ||||||
|  |     CompareState *s = COLO_COMPARE(obj); | ||||||
|  | 
 | ||||||
|  |     return g_strdup(s->outdev); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static void compare_set_outdev(Object *obj, const char *value, Error **errp) | ||||||
|  | { | ||||||
|  |     CompareState *s = COLO_COMPARE(obj); | ||||||
|  | 
 | ||||||
|  |     g_free(s->outdev); | ||||||
|  |     s->outdev = g_strdup(value); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static void compare_pri_rs_finalize(SocketReadState *pri_rs) | ||||||
|  | { | ||||||
|  |     CompareState *s = container_of(pri_rs, CompareState, pri_rs); | ||||||
|  | 
 | ||||||
|  |     if (packet_enqueue(s, PRIMARY_IN)) { | ||||||
|  |         trace_colo_compare_main("primary: unsupported packet in"); | ||||||
|  |         compare_chr_send(s->chr_out, pri_rs->buf, pri_rs->packet_len); | ||||||
|  |     } else { | ||||||
|  |         /* compare connection */ | ||||||
|  |         g_queue_foreach(&s->conn_list, colo_compare_connection, s); | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static void compare_sec_rs_finalize(SocketReadState *sec_rs) | ||||||
|  | { | ||||||
|  |     CompareState *s = container_of(sec_rs, CompareState, sec_rs); | ||||||
|  | 
 | ||||||
|  |     if (packet_enqueue(s, SECONDARY_IN)) { | ||||||
|  |         trace_colo_compare_main("secondary: unsupported packet in"); | ||||||
|  |     } else { | ||||||
|  |         /* compare connection */ | ||||||
|  |         g_queue_foreach(&s->conn_list, colo_compare_connection, s); | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static int compare_chardev_opts(void *opaque, | ||||||
|  |                                 const char *name, const char *value, | ||||||
|  |                                 Error **errp) | ||||||
|  | { | ||||||
|  |     CompareChardevProps *props = opaque; | ||||||
|  | 
 | ||||||
|  |     if (strcmp(name, "backend") == 0 && | ||||||
|  |         strcmp(value, "socket") == 0) { | ||||||
|  |         props->is_socket = true; | ||||||
|  |         return 0; | ||||||
|  |     } else if (strcmp(name, "host") == 0 || | ||||||
|  |               (strcmp(name, "port") == 0) || | ||||||
|  |               (strcmp(name, "server") == 0) || | ||||||
|  |               (strcmp(name, "wait") == 0) || | ||||||
|  |               (strcmp(name, "path") == 0)) { | ||||||
|  |         return 0; | ||||||
|  |     } else { | ||||||
|  |         error_setg(errp, | ||||||
|  |                    "COLO-compare does not support a chardev with option %s=%s", | ||||||
|  |                    name, value); | ||||||
|  |         return -1; | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  |  * Return 0 is success. | ||||||
|  |  * Return 1 is failed. | ||||||
|  |  */ | ||||||
|  | static int find_and_check_chardev(CharDriverState **chr, | ||||||
|  |                                   char *chr_name, | ||||||
|  |                                   Error **errp) | ||||||
|  | { | ||||||
|  |     CompareChardevProps props; | ||||||
|  | 
 | ||||||
|  |     *chr = qemu_chr_find(chr_name); | ||||||
|  |     if (*chr == NULL) { | ||||||
|  |         error_setg(errp, "Device '%s' not found", | ||||||
|  |                    chr_name); | ||||||
|  |         return 1; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     memset(&props, 0, sizeof(props)); | ||||||
|  |     if (qemu_opt_foreach((*chr)->opts, compare_chardev_opts, &props, errp)) { | ||||||
|  |         return 1; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     if (!props.is_socket) { | ||||||
|  |         error_setg(errp, "chardev \"%s\" is not a tcp socket", | ||||||
|  |                    chr_name); | ||||||
|  |         return 1; | ||||||
|  |     } | ||||||
|  |     return 0; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  |  * Check old packet regularly so it can watch for any packets | ||||||
|  |  * that the secondary hasn't produced equivalents of. | ||||||
|  |  */ | ||||||
|  | static void check_old_packet_regular(void *opaque) | ||||||
|  | { | ||||||
|  |     CompareState *s = opaque; | ||||||
|  | 
 | ||||||
|  |     timer_mod(s->timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + | ||||||
|  |               REGULAR_PACKET_CHECK_MS); | ||||||
|  |     /* if have old packet we will notify checkpoint */ | ||||||
|  |     /*
 | ||||||
|  |      * TODO: Make timer handler run in compare thread | ||||||
|  |      * like qemu_chr_add_handlers_full. | ||||||
|  |      */ | ||||||
|  |     qemu_mutex_lock(&s->timer_check_lock); | ||||||
|  |     colo_old_packet_check(s); | ||||||
|  |     qemu_mutex_unlock(&s->timer_check_lock); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  |  * Called from the main thread on the primary | ||||||
|  |  * to setup colo-compare. | ||||||
|  |  */ | ||||||
|  | static void colo_compare_complete(UserCreatable *uc, Error **errp) | ||||||
|  | { | ||||||
|  |     CompareState *s = COLO_COMPARE(uc); | ||||||
|  |     char thread_name[64]; | ||||||
|  |     static int compare_id; | ||||||
|  | 
 | ||||||
|  |     if (!s->pri_indev || !s->sec_indev || !s->outdev) { | ||||||
|  |         error_setg(errp, "colo compare needs 'primary_in' ," | ||||||
|  |                    "'secondary_in','outdev' property set"); | ||||||
|  |         return; | ||||||
|  |     } else if (!strcmp(s->pri_indev, s->outdev) || | ||||||
|  |                !strcmp(s->sec_indev, s->outdev) || | ||||||
|  |                !strcmp(s->pri_indev, s->sec_indev)) { | ||||||
|  |         error_setg(errp, "'indev' and 'outdev' could not be same " | ||||||
|  |                    "for compare module"); | ||||||
|  |         return; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     if (find_and_check_chardev(&s->chr_pri_in, s->pri_indev, errp)) { | ||||||
|  |         return; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     if (find_and_check_chardev(&s->chr_sec_in, s->sec_indev, errp)) { | ||||||
|  |         return; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     if (find_and_check_chardev(&s->chr_out, s->outdev, errp)) { | ||||||
|  |         return; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     qemu_chr_fe_claim_no_fail(s->chr_pri_in); | ||||||
|  | 
 | ||||||
|  |     qemu_chr_fe_claim_no_fail(s->chr_sec_in); | ||||||
|  | 
 | ||||||
|  |     qemu_chr_fe_claim_no_fail(s->chr_out); | ||||||
|  | 
 | ||||||
|  |     net_socket_rs_init(&s->pri_rs, compare_pri_rs_finalize); | ||||||
|  |     net_socket_rs_init(&s->sec_rs, compare_sec_rs_finalize); | ||||||
|  | 
 | ||||||
|  |     g_queue_init(&s->conn_list); | ||||||
|  |     qemu_mutex_init(&s->timer_check_lock); | ||||||
|  | 
 | ||||||
|  |     s->connection_track_table = g_hash_table_new_full(connection_key_hash, | ||||||
|  |                                                       connection_key_equal, | ||||||
|  |                                                       g_free, | ||||||
|  |                                                       connection_destroy); | ||||||
|  | 
 | ||||||
|  |     sprintf(thread_name, "colo-compare %d", compare_id); | ||||||
|  |     qemu_thread_create(&s->thread, thread_name, | ||||||
|  |                        colo_compare_thread, s, | ||||||
|  |                        QEMU_THREAD_JOINABLE); | ||||||
|  |     compare_id++; | ||||||
|  | 
 | ||||||
|  |     /* A regular timer to kick any packets that the secondary doesn't match */ | ||||||
|  |     s->timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, /* Only when guest runs */ | ||||||
|  |                             check_old_packet_regular, s); | ||||||
|  |     timer_mod(s->timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + | ||||||
|  |                         REGULAR_PACKET_CHECK_MS); | ||||||
|  | 
 | ||||||
|  |     return; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static void colo_compare_class_init(ObjectClass *oc, void *data) | ||||||
|  | { | ||||||
|  |     UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc); | ||||||
|  | 
 | ||||||
|  |     ucc->complete = colo_compare_complete; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static void colo_compare_init(Object *obj) | ||||||
|  | { | ||||||
|  |     object_property_add_str(obj, "primary_in", | ||||||
|  |                             compare_get_pri_indev, compare_set_pri_indev, | ||||||
|  |                             NULL); | ||||||
|  |     object_property_add_str(obj, "secondary_in", | ||||||
|  |                             compare_get_sec_indev, compare_set_sec_indev, | ||||||
|  |                             NULL); | ||||||
|  |     object_property_add_str(obj, "outdev", | ||||||
|  |                             compare_get_outdev, compare_set_outdev, | ||||||
|  |                             NULL); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static void colo_compare_finalize(Object *obj) | ||||||
|  | { | ||||||
|  |     CompareState *s = COLO_COMPARE(obj); | ||||||
|  | 
 | ||||||
|  |     if (s->chr_pri_in) { | ||||||
|  |         qemu_chr_add_handlers(s->chr_pri_in, NULL, NULL, NULL, NULL); | ||||||
|  |         qemu_chr_fe_release(s->chr_pri_in); | ||||||
|  |     } | ||||||
|  |     if (s->chr_sec_in) { | ||||||
|  |         qemu_chr_add_handlers(s->chr_sec_in, NULL, NULL, NULL, NULL); | ||||||
|  |         qemu_chr_fe_release(s->chr_sec_in); | ||||||
|  |     } | ||||||
|  |     if (s->chr_out) { | ||||||
|  |         qemu_chr_fe_release(s->chr_out); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     g_queue_free(&s->conn_list); | ||||||
|  | 
 | ||||||
|  |     if (qemu_thread_is_self(&s->thread)) { | ||||||
|  |         /* compare connection */ | ||||||
|  |         g_queue_foreach(&s->conn_list, colo_compare_connection, s); | ||||||
|  |         qemu_thread_join(&s->thread); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     if (s->timer) { | ||||||
|  |         timer_del(s->timer); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     qemu_mutex_destroy(&s->timer_check_lock); | ||||||
|  | 
 | ||||||
|  |     g_free(s->pri_indev); | ||||||
|  |     g_free(s->sec_indev); | ||||||
|  |     g_free(s->outdev); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static const TypeInfo colo_compare_info = { | ||||||
|  |     .name = TYPE_COLO_COMPARE, | ||||||
|  |     .parent = TYPE_OBJECT, | ||||||
|  |     .instance_size = sizeof(CompareState), | ||||||
|  |     .instance_init = colo_compare_init, | ||||||
|  |     .instance_finalize = colo_compare_finalize, | ||||||
|  |     .class_size = sizeof(CompareClass), | ||||||
|  |     .class_init = colo_compare_class_init, | ||||||
|  |     .interfaces = (InterfaceInfo[]) { | ||||||
|  |         { TYPE_USER_CREATABLE }, | ||||||
|  |         { } | ||||||
|  |     } | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | static void register_types(void) | ||||||
|  | { | ||||||
|  |     type_register_static(&colo_compare_info); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | type_init(register_types); | ||||||
							
								
								
									
										211
									
								
								net/colo.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										211
									
								
								net/colo.c
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,211 @@ | |||||||
|  | /*
 | ||||||
|  |  * COarse-grain LOck-stepping Virtual Machines for Non-stop Service (COLO) | ||||||
|  |  * (a.k.a. Fault Tolerance or Continuous Replication) | ||||||
|  |  * | ||||||
|  |  * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD. | ||||||
|  |  * Copyright (c) 2016 FUJITSU LIMITED | ||||||
|  |  * Copyright (c) 2016 Intel Corporation | ||||||
|  |  * | ||||||
|  |  * Author: Zhang Chen <zhangchen.fnst@cn.fujitsu.com> | ||||||
|  |  * | ||||||
|  |  * This work is licensed under the terms of the GNU GPL, version 2 or | ||||||
|  |  * later.  See the COPYING file in the top-level directory. | ||||||
|  |  */ | ||||||
|  | 
 | ||||||
|  | #include "qemu/osdep.h" | ||||||
|  | #include "trace.h" | ||||||
|  | #include "net/colo.h" | ||||||
|  | 
 | ||||||
|  | uint32_t connection_key_hash(const void *opaque) | ||||||
|  | { | ||||||
|  |     const ConnectionKey *key = opaque; | ||||||
|  |     uint32_t a, b, c; | ||||||
|  | 
 | ||||||
|  |     /* Jenkins hash */ | ||||||
|  |     a = b = c = JHASH_INITVAL + sizeof(*key); | ||||||
|  |     a += key->src.s_addr; | ||||||
|  |     b += key->dst.s_addr; | ||||||
|  |     c += (key->src_port | key->dst_port << 16); | ||||||
|  |     __jhash_mix(a, b, c); | ||||||
|  | 
 | ||||||
|  |     a += key->ip_proto; | ||||||
|  |     __jhash_final(a, b, c); | ||||||
|  | 
 | ||||||
|  |     return c; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | int connection_key_equal(const void *key1, const void *key2) | ||||||
|  | { | ||||||
|  |     return memcmp(key1, key2, sizeof(ConnectionKey)) == 0; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | int parse_packet_early(Packet *pkt) | ||||||
|  | { | ||||||
|  |     int network_length; | ||||||
|  |     static const uint8_t vlan[] = {0x81, 0x00}; | ||||||
|  |     uint8_t *data = pkt->data; | ||||||
|  |     uint16_t l3_proto; | ||||||
|  |     ssize_t l2hdr_len = eth_get_l2_hdr_length(data); | ||||||
|  | 
 | ||||||
|  |     if (pkt->size < ETH_HLEN) { | ||||||
|  |         trace_colo_proxy_main("pkt->size < ETH_HLEN"); | ||||||
|  |         return 1; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     /*
 | ||||||
|  |      * TODO: support vlan. | ||||||
|  |      */ | ||||||
|  |     if (!memcmp(&data[12], vlan, sizeof(vlan))) { | ||||||
|  |         trace_colo_proxy_main("COLO-proxy don't support vlan"); | ||||||
|  |         return 1; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     pkt->network_header = data + l2hdr_len; | ||||||
|  | 
 | ||||||
|  |     const struct iovec l2vec = { | ||||||
|  |         .iov_base = (void *) data, | ||||||
|  |         .iov_len = l2hdr_len | ||||||
|  |     }; | ||||||
|  |     l3_proto = eth_get_l3_proto(&l2vec, 1, l2hdr_len); | ||||||
|  | 
 | ||||||
|  |     if (l3_proto != ETH_P_IP) { | ||||||
|  |         return 1; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     network_length = pkt->ip->ip_hl * 4; | ||||||
|  |     if (pkt->size < l2hdr_len + network_length) { | ||||||
|  |         trace_colo_proxy_main("pkt->size < network_header + network_length"); | ||||||
|  |         return 1; | ||||||
|  |     } | ||||||
|  |     pkt->transport_header = pkt->network_header + network_length; | ||||||
|  | 
 | ||||||
|  |     return 0; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | void fill_connection_key(Packet *pkt, ConnectionKey *key) | ||||||
|  | { | ||||||
|  |     uint32_t tmp_ports; | ||||||
|  | 
 | ||||||
|  |     memset(key, 0, sizeof(*key)); | ||||||
|  |     key->ip_proto = pkt->ip->ip_p; | ||||||
|  | 
 | ||||||
|  |     switch (key->ip_proto) { | ||||||
|  |     case IPPROTO_TCP: | ||||||
|  |     case IPPROTO_UDP: | ||||||
|  |     case IPPROTO_DCCP: | ||||||
|  |     case IPPROTO_ESP: | ||||||
|  |     case IPPROTO_SCTP: | ||||||
|  |     case IPPROTO_UDPLITE: | ||||||
|  |         tmp_ports = *(uint32_t *)(pkt->transport_header); | ||||||
|  |         key->src = pkt->ip->ip_src; | ||||||
|  |         key->dst = pkt->ip->ip_dst; | ||||||
|  |         key->src_port = ntohs(tmp_ports & 0xffff); | ||||||
|  |         key->dst_port = ntohs(tmp_ports >> 16); | ||||||
|  |         break; | ||||||
|  |     case IPPROTO_AH: | ||||||
|  |         tmp_ports = *(uint32_t *)(pkt->transport_header + 4); | ||||||
|  |         key->src = pkt->ip->ip_src; | ||||||
|  |         key->dst = pkt->ip->ip_dst; | ||||||
|  |         key->src_port = ntohs(tmp_ports & 0xffff); | ||||||
|  |         key->dst_port = ntohs(tmp_ports >> 16); | ||||||
|  |         break; | ||||||
|  |     default: | ||||||
|  |         break; | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | void reverse_connection_key(ConnectionKey *key) | ||||||
|  | { | ||||||
|  |     struct in_addr tmp_ip; | ||||||
|  |     uint16_t tmp_port; | ||||||
|  | 
 | ||||||
|  |     tmp_ip = key->src; | ||||||
|  |     key->src = key->dst; | ||||||
|  |     key->dst = tmp_ip; | ||||||
|  | 
 | ||||||
|  |     tmp_port = key->src_port; | ||||||
|  |     key->src_port = key->dst_port; | ||||||
|  |     key->dst_port = tmp_port; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | Connection *connection_new(ConnectionKey *key) | ||||||
|  | { | ||||||
|  |     Connection *conn = g_slice_new(Connection); | ||||||
|  | 
 | ||||||
|  |     conn->ip_proto = key->ip_proto; | ||||||
|  |     conn->processing = false; | ||||||
|  |     conn->offset = 0; | ||||||
|  |     conn->syn_flag = 0; | ||||||
|  |     g_queue_init(&conn->primary_list); | ||||||
|  |     g_queue_init(&conn->secondary_list); | ||||||
|  | 
 | ||||||
|  |     return conn; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | void connection_destroy(void *opaque) | ||||||
|  | { | ||||||
|  |     Connection *conn = opaque; | ||||||
|  | 
 | ||||||
|  |     g_queue_foreach(&conn->primary_list, packet_destroy, NULL); | ||||||
|  |     g_queue_free(&conn->primary_list); | ||||||
|  |     g_queue_foreach(&conn->secondary_list, packet_destroy, NULL); | ||||||
|  |     g_queue_free(&conn->secondary_list); | ||||||
|  |     g_slice_free(Connection, conn); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | Packet *packet_new(const void *data, int size) | ||||||
|  | { | ||||||
|  |     Packet *pkt = g_slice_new(Packet); | ||||||
|  | 
 | ||||||
|  |     pkt->data = g_memdup(data, size); | ||||||
|  |     pkt->size = size; | ||||||
|  |     pkt->creation_ms = qemu_clock_get_ms(QEMU_CLOCK_HOST); | ||||||
|  | 
 | ||||||
|  |     return pkt; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | void packet_destroy(void *opaque, void *user_data) | ||||||
|  | { | ||||||
|  |     Packet *pkt = opaque; | ||||||
|  | 
 | ||||||
|  |     g_free(pkt->data); | ||||||
|  |     g_slice_free(Packet, pkt); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  |  * Clear hashtable, stop this hash growing really huge | ||||||
|  |  */ | ||||||
|  | void connection_hashtable_reset(GHashTable *connection_track_table) | ||||||
|  | { | ||||||
|  |     g_hash_table_remove_all(connection_track_table); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | /* if not found, create a new connection and add to hash table */ | ||||||
|  | Connection *connection_get(GHashTable *connection_track_table, | ||||||
|  |                            ConnectionKey *key, | ||||||
|  |                            GQueue *conn_list) | ||||||
|  | { | ||||||
|  |     Connection *conn = g_hash_table_lookup(connection_track_table, key); | ||||||
|  | 
 | ||||||
|  |     if (conn == NULL) { | ||||||
|  |         ConnectionKey *new_key = g_memdup(key, sizeof(*key)); | ||||||
|  | 
 | ||||||
|  |         conn = connection_new(key); | ||||||
|  | 
 | ||||||
|  |         if (g_hash_table_size(connection_track_table) > HASHTABLE_MAX_SIZE) { | ||||||
|  |             trace_colo_proxy_main("colo proxy connection hashtable full," | ||||||
|  |                                   " clear it"); | ||||||
|  |             connection_hashtable_reset(connection_track_table); | ||||||
|  |             /*
 | ||||||
|  |              * clear the conn_list | ||||||
|  |              */ | ||||||
|  |             while (!g_queue_is_empty(conn_list)) { | ||||||
|  |                 connection_destroy(g_queue_pop_head(conn_list)); | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         g_hash_table_insert(connection_track_table, new_key, conn); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     return conn; | ||||||
|  | } | ||||||
							
								
								
									
										88
									
								
								net/colo.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										88
									
								
								net/colo.h
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,88 @@ | |||||||
|  | /*
 | ||||||
|  |  * COarse-grain LOck-stepping Virtual Machines for Non-stop Service (COLO) | ||||||
|  |  * (a.k.a. Fault Tolerance or Continuous Replication) | ||||||
|  |  * | ||||||
|  |  * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD. | ||||||
|  |  * Copyright (c) 2016 FUJITSU LIMITED | ||||||
|  |  * Copyright (c) 2016 Intel Corporation | ||||||
|  |  * | ||||||
|  |  * Author: Zhang Chen <zhangchen.fnst@cn.fujitsu.com> | ||||||
|  |  * | ||||||
|  |  * This work is licensed under the terms of the GNU GPL, version 2 or | ||||||
|  |  * later.  See the COPYING file in the top-level directory. | ||||||
|  |  */ | ||||||
|  | 
 | ||||||
|  | #ifndef QEMU_COLO_PROXY_H | ||||||
|  | #define QEMU_COLO_PROXY_H | ||||||
|  | 
 | ||||||
|  | #include "slirp/slirp.h" | ||||||
|  | #include "qemu/jhash.h" | ||||||
|  | #include "qemu/timer.h" | ||||||
|  | 
 | ||||||
|  | #define HASHTABLE_MAX_SIZE 16384 | ||||||
|  | 
 | ||||||
|  | #ifndef IPPROTO_DCCP | ||||||
|  | #define IPPROTO_DCCP 33 | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
|  | #ifndef IPPROTO_SCTP | ||||||
|  | #define IPPROTO_SCTP 132 | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
|  | #ifndef IPPROTO_UDPLITE | ||||||
|  | #define IPPROTO_UDPLITE 136 | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
|  | typedef struct Packet { | ||||||
|  |     void *data; | ||||||
|  |     union { | ||||||
|  |         uint8_t *network_header; | ||||||
|  |         struct ip *ip; | ||||||
|  |     }; | ||||||
|  |     uint8_t *transport_header; | ||||||
|  |     int size; | ||||||
|  |     /* Time of packet creation, in wall clock ms */ | ||||||
|  |     int64_t creation_ms; | ||||||
|  | } Packet; | ||||||
|  | 
 | ||||||
|  | typedef struct ConnectionKey { | ||||||
|  |     /* (src, dst) must be grouped, in the same way than in IP header */ | ||||||
|  |     struct in_addr src; | ||||||
|  |     struct in_addr dst; | ||||||
|  |     uint16_t src_port; | ||||||
|  |     uint16_t dst_port; | ||||||
|  |     uint8_t ip_proto; | ||||||
|  | } QEMU_PACKED ConnectionKey; | ||||||
|  | 
 | ||||||
|  | typedef struct Connection { | ||||||
|  |     /* connection primary send queue: element type: Packet */ | ||||||
|  |     GQueue primary_list; | ||||||
|  |     /* connection secondary send queue: element type: Packet */ | ||||||
|  |     GQueue secondary_list; | ||||||
|  |     /* flag to enqueue unprocessed_connections */ | ||||||
|  |     bool processing; | ||||||
|  |     uint8_t ip_proto; | ||||||
|  |     /* offset = secondary_seq - primary_seq */ | ||||||
|  |     tcp_seq  offset; | ||||||
|  |     /*
 | ||||||
|  |      * we use this flag update offset func | ||||||
|  |      * run once in independent tcp connection | ||||||
|  |      */ | ||||||
|  |     int syn_flag; | ||||||
|  | } Connection; | ||||||
|  | 
 | ||||||
|  | uint32_t connection_key_hash(const void *opaque); | ||||||
|  | int connection_key_equal(const void *opaque1, const void *opaque2); | ||||||
|  | int parse_packet_early(Packet *pkt); | ||||||
|  | void fill_connection_key(Packet *pkt, ConnectionKey *key); | ||||||
|  | void reverse_connection_key(ConnectionKey *key); | ||||||
|  | Connection *connection_new(ConnectionKey *key); | ||||||
|  | void connection_destroy(void *opaque); | ||||||
|  | Connection *connection_get(GHashTable *connection_track_table, | ||||||
|  |                            ConnectionKey *key, | ||||||
|  |                            GQueue *conn_list); | ||||||
|  | void connection_hashtable_reset(GHashTable *connection_track_table); | ||||||
|  | Packet *packet_new(const void *data, int size); | ||||||
|  | void packet_destroy(void *opaque, void *user_data); | ||||||
|  | 
 | ||||||
|  | #endif /* QEMU_COLO_PROXY_H */ | ||||||
							
								
								
									
										263
									
								
								net/filter-rewriter.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										263
									
								
								net/filter-rewriter.c
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,263 @@ | |||||||
|  | /*
 | ||||||
|  |  * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD. | ||||||
|  |  * Copyright (c) 2016 FUJITSU LIMITED | ||||||
|  |  * Copyright (c) 2016 Intel Corporation | ||||||
|  |  * | ||||||
|  |  * Author: Zhang Chen <zhangchen.fnst@cn.fujitsu.com> | ||||||
|  |  * | ||||||
|  |  * This work is licensed under the terms of the GNU GPL, version 2 or | ||||||
|  |  * later.  See the COPYING file in the top-level directory. | ||||||
|  |  */ | ||||||
|  | 
 | ||||||
|  | #include "qemu/osdep.h" | ||||||
|  | #include "trace.h" | ||||||
|  | #include "net/colo.h" | ||||||
|  | #include "net/filter.h" | ||||||
|  | #include "net/net.h" | ||||||
|  | #include "qemu-common.h" | ||||||
|  | #include "qapi/error.h" | ||||||
|  | #include "qapi/qmp/qerror.h" | ||||||
|  | #include "qapi-visit.h" | ||||||
|  | #include "qom/object.h" | ||||||
|  | #include "qemu/main-loop.h" | ||||||
|  | #include "qemu/iov.h" | ||||||
|  | #include "net/checksum.h" | ||||||
|  | 
 | ||||||
|  | #define FILTER_COLO_REWRITER(obj) \ | ||||||
|  |     OBJECT_CHECK(RewriterState, (obj), TYPE_FILTER_REWRITER) | ||||||
|  | 
 | ||||||
|  | #define TYPE_FILTER_REWRITER "filter-rewriter" | ||||||
|  | 
 | ||||||
|  | typedef struct RewriterState { | ||||||
|  |     NetFilterState parent_obj; | ||||||
|  |     NetQueue *incoming_queue; | ||||||
|  |     /* hashtable to save connection */ | ||||||
|  |     GHashTable *connection_track_table; | ||||||
|  | } RewriterState; | ||||||
|  | 
 | ||||||
|  | static void filter_rewriter_flush(NetFilterState *nf) | ||||||
|  | { | ||||||
|  |     RewriterState *s = FILTER_COLO_REWRITER(nf); | ||||||
|  | 
 | ||||||
|  |     if (!qemu_net_queue_flush(s->incoming_queue)) { | ||||||
|  |         /* Unable to empty the queue, purge remaining packets */ | ||||||
|  |         qemu_net_queue_purge(s->incoming_queue, nf->netdev); | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  |  * Return 1 on success, if return 0 means the pkt | ||||||
|  |  * is not TCP packet | ||||||
|  |  */ | ||||||
|  | static int is_tcp_packet(Packet *pkt) | ||||||
|  | { | ||||||
|  |     if (!parse_packet_early(pkt) && | ||||||
|  |         pkt->ip->ip_p == IPPROTO_TCP) { | ||||||
|  |         return 1; | ||||||
|  |     } else { | ||||||
|  |         return 0; | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | /* handle tcp packet from primary guest */ | ||||||
|  | static int handle_primary_tcp_pkt(NetFilterState *nf, | ||||||
|  |                                   Connection *conn, | ||||||
|  |                                   Packet *pkt) | ||||||
|  | { | ||||||
|  |     struct tcphdr *tcp_pkt; | ||||||
|  | 
 | ||||||
|  |     tcp_pkt = (struct tcphdr *)pkt->transport_header; | ||||||
|  |     if (trace_event_get_state(TRACE_COLO_FILTER_REWRITER_DEBUG)) { | ||||||
|  |         char *sdebug, *ddebug; | ||||||
|  |         sdebug = strdup(inet_ntoa(pkt->ip->ip_src)); | ||||||
|  |         ddebug = strdup(inet_ntoa(pkt->ip->ip_dst)); | ||||||
|  |         trace_colo_filter_rewriter_pkt_info(__func__, sdebug, ddebug, | ||||||
|  |                     ntohl(tcp_pkt->th_seq), ntohl(tcp_pkt->th_ack), | ||||||
|  |                     tcp_pkt->th_flags); | ||||||
|  |         trace_colo_filter_rewriter_conn_offset(conn->offset); | ||||||
|  |         g_free(sdebug); | ||||||
|  |         g_free(ddebug); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     if (((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == TH_SYN)) { | ||||||
|  |         /*
 | ||||||
|  |          * we use this flag update offset func | ||||||
|  |          * run once in independent tcp connection | ||||||
|  |          */ | ||||||
|  |         conn->syn_flag = 1; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     if (((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == TH_ACK)) { | ||||||
|  |         if (conn->syn_flag) { | ||||||
|  |             /*
 | ||||||
|  |              * offset = secondary_seq - primary seq | ||||||
|  |              * ack packet sent by guest from primary node, | ||||||
|  |              * so we use th_ack - 1 get primary_seq | ||||||
|  |              */ | ||||||
|  |             conn->offset -= (ntohl(tcp_pkt->th_ack) - 1); | ||||||
|  |             conn->syn_flag = 0; | ||||||
|  |         } | ||||||
|  |         /* handle packets to the secondary from the primary */ | ||||||
|  |         tcp_pkt->th_ack = htonl(ntohl(tcp_pkt->th_ack) + conn->offset); | ||||||
|  | 
 | ||||||
|  |         net_checksum_calculate((uint8_t *)pkt->data, pkt->size); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     return 0; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | /* handle tcp packet from secondary guest */ | ||||||
|  | static int handle_secondary_tcp_pkt(NetFilterState *nf, | ||||||
|  |                                     Connection *conn, | ||||||
|  |                                     Packet *pkt) | ||||||
|  | { | ||||||
|  |     struct tcphdr *tcp_pkt; | ||||||
|  | 
 | ||||||
|  |     tcp_pkt = (struct tcphdr *)pkt->transport_header; | ||||||
|  | 
 | ||||||
|  |     if (trace_event_get_state(TRACE_COLO_FILTER_REWRITER_DEBUG)) { | ||||||
|  |         char *sdebug, *ddebug; | ||||||
|  |         sdebug = strdup(inet_ntoa(pkt->ip->ip_src)); | ||||||
|  |         ddebug = strdup(inet_ntoa(pkt->ip->ip_dst)); | ||||||
|  |         trace_colo_filter_rewriter_pkt_info(__func__, sdebug, ddebug, | ||||||
|  |                     ntohl(tcp_pkt->th_seq), ntohl(tcp_pkt->th_ack), | ||||||
|  |                     tcp_pkt->th_flags); | ||||||
|  |         trace_colo_filter_rewriter_conn_offset(conn->offset); | ||||||
|  |         g_free(sdebug); | ||||||
|  |         g_free(ddebug); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     if (((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == (TH_ACK | TH_SYN))) { | ||||||
|  |         /*
 | ||||||
|  |          * save offset = secondary_seq and then | ||||||
|  |          * in handle_primary_tcp_pkt make offset | ||||||
|  |          * = secondary_seq - primary_seq | ||||||
|  |          */ | ||||||
|  |         conn->offset = ntohl(tcp_pkt->th_seq); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     if ((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == TH_ACK) { | ||||||
|  |         /* handle packets to the primary from the secondary*/ | ||||||
|  |         tcp_pkt->th_seq = htonl(ntohl(tcp_pkt->th_seq) - conn->offset); | ||||||
|  | 
 | ||||||
|  |         net_checksum_calculate((uint8_t *)pkt->data, pkt->size); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     return 0; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static ssize_t colo_rewriter_receive_iov(NetFilterState *nf, | ||||||
|  |                                          NetClientState *sender, | ||||||
|  |                                          unsigned flags, | ||||||
|  |                                          const struct iovec *iov, | ||||||
|  |                                          int iovcnt, | ||||||
|  |                                          NetPacketSent *sent_cb) | ||||||
|  | { | ||||||
|  |     RewriterState *s = FILTER_COLO_REWRITER(nf); | ||||||
|  |     Connection *conn; | ||||||
|  |     ConnectionKey key; | ||||||
|  |     Packet *pkt; | ||||||
|  |     ssize_t size = iov_size(iov, iovcnt); | ||||||
|  |     char *buf = g_malloc0(size); | ||||||
|  | 
 | ||||||
|  |     iov_to_buf(iov, iovcnt, 0, buf, size); | ||||||
|  |     pkt = packet_new(buf, size); | ||||||
|  | 
 | ||||||
|  |     /*
 | ||||||
|  |      * if we get tcp packet | ||||||
|  |      * we will rewrite it to make secondary guest's | ||||||
|  |      * connection established successfully | ||||||
|  |      */ | ||||||
|  |     if (pkt && is_tcp_packet(pkt)) { | ||||||
|  | 
 | ||||||
|  |         fill_connection_key(pkt, &key); | ||||||
|  | 
 | ||||||
|  |         if (sender == nf->netdev) { | ||||||
|  |             /*
 | ||||||
|  |              * We need make tcp TX and RX packet | ||||||
|  |              * into one connection. | ||||||
|  |              */ | ||||||
|  |             reverse_connection_key(&key); | ||||||
|  |         } | ||||||
|  |         conn = connection_get(s->connection_track_table, | ||||||
|  |                               &key, | ||||||
|  |                               NULL); | ||||||
|  | 
 | ||||||
|  |         if (sender == nf->netdev) { | ||||||
|  |             /* NET_FILTER_DIRECTION_TX */ | ||||||
|  |             if (!handle_primary_tcp_pkt(nf, conn, pkt)) { | ||||||
|  |                 qemu_net_queue_send(s->incoming_queue, sender, 0, | ||||||
|  |                 (const uint8_t *)pkt->data, pkt->size, NULL); | ||||||
|  |                 packet_destroy(pkt, NULL); | ||||||
|  |                 pkt = NULL; | ||||||
|  |                 /*
 | ||||||
|  |                  * We block the packet here,after rewrite pkt | ||||||
|  |                  * and will send it | ||||||
|  |                  */ | ||||||
|  |                 return 1; | ||||||
|  |             } | ||||||
|  |         } else { | ||||||
|  |             /* NET_FILTER_DIRECTION_RX */ | ||||||
|  |             if (!handle_secondary_tcp_pkt(nf, conn, pkt)) { | ||||||
|  |                 qemu_net_queue_send(s->incoming_queue, sender, 0, | ||||||
|  |                 (const uint8_t *)pkt->data, pkt->size, NULL); | ||||||
|  |                 packet_destroy(pkt, NULL); | ||||||
|  |                 pkt = NULL; | ||||||
|  |                 /*
 | ||||||
|  |                  * We block the packet here,after rewrite pkt | ||||||
|  |                  * and will send it | ||||||
|  |                  */ | ||||||
|  |                 return 1; | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     packet_destroy(pkt, NULL); | ||||||
|  |     pkt = NULL; | ||||||
|  |     return 0; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static void colo_rewriter_cleanup(NetFilterState *nf) | ||||||
|  | { | ||||||
|  |     RewriterState *s = FILTER_COLO_REWRITER(nf); | ||||||
|  | 
 | ||||||
|  |     /* flush packets */ | ||||||
|  |     if (s->incoming_queue) { | ||||||
|  |         filter_rewriter_flush(nf); | ||||||
|  |         g_free(s->incoming_queue); | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static void colo_rewriter_setup(NetFilterState *nf, Error **errp) | ||||||
|  | { | ||||||
|  |     RewriterState *s = FILTER_COLO_REWRITER(nf); | ||||||
|  | 
 | ||||||
|  |     s->connection_track_table = g_hash_table_new_full(connection_key_hash, | ||||||
|  |                                                       connection_key_equal, | ||||||
|  |                                                       g_free, | ||||||
|  |                                                       connection_destroy); | ||||||
|  |     s->incoming_queue = qemu_new_net_queue(qemu_netfilter_pass_to_next, nf); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static void colo_rewriter_class_init(ObjectClass *oc, void *data) | ||||||
|  | { | ||||||
|  |     NetFilterClass *nfc = NETFILTER_CLASS(oc); | ||||||
|  | 
 | ||||||
|  |     nfc->setup = colo_rewriter_setup; | ||||||
|  |     nfc->cleanup = colo_rewriter_cleanup; | ||||||
|  |     nfc->receive_iov = colo_rewriter_receive_iov; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static const TypeInfo colo_rewriter_info = { | ||||||
|  |     .name = TYPE_FILTER_REWRITER, | ||||||
|  |     .parent = TYPE_NETFILTER, | ||||||
|  |     .class_init = colo_rewriter_class_init, | ||||||
|  |     .instance_size = sizeof(RewriterState), | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | static void register_types(void) | ||||||
|  | { | ||||||
|  |     type_register_static(&colo_rewriter_info); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | type_init(register_types); | ||||||
| @ -690,9 +690,13 @@ static ssize_t nc_sendv_compat(NetClientState *nc, const struct iovec *iov, | |||||||
|         buffer = iov[0].iov_base; |         buffer = iov[0].iov_base; | ||||||
|         offset = iov[0].iov_len; |         offset = iov[0].iov_len; | ||||||
|     } else { |     } else { | ||||||
|         buf = g_new(uint8_t, NET_BUFSIZE); |         offset = iov_size(iov, iovcnt); | ||||||
|  |         if (offset > NET_BUFSIZE) { | ||||||
|  |             return -1; | ||||||
|  |         } | ||||||
|  |         buf = g_malloc(offset); | ||||||
|         buffer = buf; |         buffer = buf; | ||||||
|         offset = iov_to_buf(iov, iovcnt, 0, buf, NET_BUFSIZE); |         offset = iov_to_buf(iov, iovcnt, 0, buf, offset); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     if (flags & QEMU_NET_PACKET_FLAG_RAW && nc->info->receive_raw) { |     if (flags & QEMU_NET_PACKET_FLAG_RAW && nc->info->receive_raw) { | ||||||
| @ -1179,6 +1183,7 @@ void hmp_host_net_remove(Monitor *mon, const QDict *qdict) | |||||||
| 
 | 
 | ||||||
|     qemu_del_net_client(nc->peer); |     qemu_del_net_client(nc->peer); | ||||||
|     qemu_del_net_client(nc); |     qemu_del_net_client(nc); | ||||||
|  |     qemu_opts_del(qemu_opts_find(qemu_find_opts("net"), device)); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void netdev_add(QemuOpts *opts, Error **errp) | void netdev_add(QemuOpts *opts, Error **errp) | ||||||
|  | |||||||
| @ -857,7 +857,9 @@ free_fail: | |||||||
|             return -1; |             return -1; | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|         fd = net_bridge_run_helper(tap->helper, DEFAULT_BRIDGE_INTERFACE, |         fd = net_bridge_run_helper(tap->helper, | ||||||
|  |                                    tap->has_br ? | ||||||
|  |                                    tap->br : DEFAULT_BRIDGE_INTERFACE, | ||||||
|                                    errp); |                                    errp); | ||||||
|         if (fd == -1) { |         if (fd == -1) { | ||||||
|             return -1; |             return -1; | ||||||
|  | |||||||
| @ -2636,6 +2636,8 @@ | |||||||
| # | # | ||||||
| # @downscript: #optional script to shut down the interface | # @downscript: #optional script to shut down the interface | ||||||
| # | # | ||||||
|  | # @br: #optional bridge name (since 2.8) | ||||||
|  | # | ||||||
| # @helper: #optional command to execute to configure bridge | # @helper: #optional command to execute to configure bridge | ||||||
| # | # | ||||||
| # @sndbuf: #optional send buffer limit. Understands [TGMKkb] suffixes. | # @sndbuf: #optional send buffer limit. Understands [TGMKkb] suffixes. | ||||||
| @ -2665,6 +2667,7 @@ | |||||||
|     '*fds':        'str', |     '*fds':        'str', | ||||||
|     '*script':     'str', |     '*script':     'str', | ||||||
|     '*downscript': 'str', |     '*downscript': 'str', | ||||||
|  |     '*br':         'str', | ||||||
|     '*helper':     'str', |     '*helper':     'str', | ||||||
|     '*sndbuf':     'size', |     '*sndbuf':     'size', | ||||||
|     '*vnet_hdr':   'bool', |     '*vnet_hdr':   'bool', | ||||||
|  | |||||||
							
								
								
									
										71
									
								
								qemu-char.c
									
									
									
									
									
								
							
							
						
						
									
										71
									
								
								qemu-char.c
									
									
									
									
									
								
							| @ -449,11 +449,12 @@ void qemu_chr_fe_printf(CharDriverState *s, const char *fmt, ...) | |||||||
| 
 | 
 | ||||||
| static void remove_fd_in_watch(CharDriverState *chr); | static void remove_fd_in_watch(CharDriverState *chr); | ||||||
| 
 | 
 | ||||||
| void qemu_chr_add_handlers(CharDriverState *s, | void qemu_chr_add_handlers_full(CharDriverState *s, | ||||||
|                                 IOCanReadHandler *fd_can_read, |                                 IOCanReadHandler *fd_can_read, | ||||||
|                                 IOReadHandler *fd_read, |                                 IOReadHandler *fd_read, | ||||||
|                                 IOEventHandler *fd_event, |                                 IOEventHandler *fd_event, | ||||||
|                            void *opaque) |                                 void *opaque, | ||||||
|  |                                 GMainContext *context) | ||||||
| { | { | ||||||
|     int fe_open; |     int fe_open; | ||||||
| 
 | 
 | ||||||
| @ -467,8 +468,9 @@ void qemu_chr_add_handlers(CharDriverState *s, | |||||||
|     s->chr_read = fd_read; |     s->chr_read = fd_read; | ||||||
|     s->chr_event = fd_event; |     s->chr_event = fd_event; | ||||||
|     s->handler_opaque = opaque; |     s->handler_opaque = opaque; | ||||||
|     if (fe_open && s->chr_update_read_handler) |     if (fe_open && s->chr_update_read_handler) { | ||||||
|         s->chr_update_read_handler(s); |         s->chr_update_read_handler(s, context); | ||||||
|  |     } | ||||||
| 
 | 
 | ||||||
|     if (!s->explicit_fe_open) { |     if (!s->explicit_fe_open) { | ||||||
|         qemu_chr_fe_set_open(s, fe_open); |         qemu_chr_fe_set_open(s, fe_open); | ||||||
| @ -481,6 +483,16 @@ void qemu_chr_add_handlers(CharDriverState *s, | |||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | void qemu_chr_add_handlers(CharDriverState *s, | ||||||
|  |                            IOCanReadHandler *fd_can_read, | ||||||
|  |                            IOReadHandler *fd_read, | ||||||
|  |                            IOEventHandler *fd_event, | ||||||
|  |                            void *opaque) | ||||||
|  | { | ||||||
|  |     qemu_chr_add_handlers_full(s, fd_can_read, fd_read, | ||||||
|  |                                fd_event, opaque, NULL); | ||||||
|  | } | ||||||
|  | 
 | ||||||
| static int null_chr_write(CharDriverState *chr, const uint8_t *buf, int len) | static int null_chr_write(CharDriverState *chr, const uint8_t *buf, int len) | ||||||
| { | { | ||||||
|     return len; |     return len; | ||||||
| @ -722,7 +734,8 @@ static void mux_chr_event(void *opaque, int event) | |||||||
|         mux_chr_send_event(d, i, event); |         mux_chr_send_event(d, i, event); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static void mux_chr_update_read_handler(CharDriverState *chr) | static void mux_chr_update_read_handler(CharDriverState *chr, | ||||||
|  |                                         GMainContext *context) | ||||||
| { | { | ||||||
|     MuxDriver *d = chr->opaque; |     MuxDriver *d = chr->opaque; | ||||||
| 
 | 
 | ||||||
| @ -736,8 +749,10 @@ static void mux_chr_update_read_handler(CharDriverState *chr) | |||||||
|     d->chr_event[d->mux_cnt] = chr->chr_event; |     d->chr_event[d->mux_cnt] = chr->chr_event; | ||||||
|     /* Fix up the real driver with mux routines */ |     /* Fix up the real driver with mux routines */ | ||||||
|     if (d->mux_cnt == 0) { |     if (d->mux_cnt == 0) { | ||||||
|         qemu_chr_add_handlers(d->drv, mux_chr_can_read, mux_chr_read, |         qemu_chr_add_handlers_full(d->drv, mux_chr_can_read, | ||||||
|                               mux_chr_event, chr); |                                    mux_chr_read, | ||||||
|  |                                    mux_chr_event, | ||||||
|  |                                    chr, context); | ||||||
|     } |     } | ||||||
|     if (d->focus != -1) { |     if (d->focus != -1) { | ||||||
|         mux_chr_send_event(d, d->focus, CHR_EVENT_MUX_OUT); |         mux_chr_send_event(d, d->focus, CHR_EVENT_MUX_OUT); | ||||||
| @ -853,6 +868,7 @@ typedef struct IOWatchPoll | |||||||
|     IOCanReadHandler *fd_can_read; |     IOCanReadHandler *fd_can_read; | ||||||
|     GSourceFunc fd_read; |     GSourceFunc fd_read; | ||||||
|     void *opaque; |     void *opaque; | ||||||
|  |     GMainContext *context; | ||||||
| } IOWatchPoll; | } IOWatchPoll; | ||||||
| 
 | 
 | ||||||
| static IOWatchPoll *io_watch_poll_from_source(GSource *source) | static IOWatchPoll *io_watch_poll_from_source(GSource *source) | ||||||
| @ -860,7 +876,8 @@ static IOWatchPoll *io_watch_poll_from_source(GSource *source) | |||||||
|     return container_of(source, IOWatchPoll, parent); |     return container_of(source, IOWatchPoll, parent); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static gboolean io_watch_poll_prepare(GSource *source, gint *timeout_) | static gboolean io_watch_poll_prepare(GSource *source, | ||||||
|  |                                       gint *timeout_) | ||||||
| { | { | ||||||
|     IOWatchPoll *iwp = io_watch_poll_from_source(source); |     IOWatchPoll *iwp = io_watch_poll_from_source(source); | ||||||
|     bool now_active = iwp->fd_can_read(iwp->opaque) > 0; |     bool now_active = iwp->fd_can_read(iwp->opaque) > 0; | ||||||
| @ -873,7 +890,7 @@ static gboolean io_watch_poll_prepare(GSource *source, gint *timeout_) | |||||||
|         iwp->src = qio_channel_create_watch( |         iwp->src = qio_channel_create_watch( | ||||||
|             iwp->ioc, G_IO_IN | G_IO_ERR | G_IO_HUP | G_IO_NVAL); |             iwp->ioc, G_IO_IN | G_IO_ERR | G_IO_HUP | G_IO_NVAL); | ||||||
|         g_source_set_callback(iwp->src, iwp->fd_read, iwp->opaque, NULL); |         g_source_set_callback(iwp->src, iwp->fd_read, iwp->opaque, NULL); | ||||||
|         g_source_attach(iwp->src, NULL); |         g_source_attach(iwp->src, iwp->context); | ||||||
|     } else { |     } else { | ||||||
|         g_source_destroy(iwp->src); |         g_source_destroy(iwp->src); | ||||||
|         g_source_unref(iwp->src); |         g_source_unref(iwp->src); | ||||||
| @ -920,19 +937,22 @@ static GSourceFuncs io_watch_poll_funcs = { | |||||||
| static guint io_add_watch_poll(QIOChannel *ioc, | static guint io_add_watch_poll(QIOChannel *ioc, | ||||||
|                                IOCanReadHandler *fd_can_read, |                                IOCanReadHandler *fd_can_read, | ||||||
|                                QIOChannelFunc fd_read, |                                QIOChannelFunc fd_read, | ||||||
|                                gpointer user_data) |                                gpointer user_data, | ||||||
|  |                                GMainContext *context) | ||||||
| { | { | ||||||
|     IOWatchPoll *iwp; |     IOWatchPoll *iwp; | ||||||
|     int tag; |     int tag; | ||||||
| 
 | 
 | ||||||
|     iwp = (IOWatchPoll *) g_source_new(&io_watch_poll_funcs, sizeof(IOWatchPoll)); |     iwp = (IOWatchPoll *) g_source_new(&io_watch_poll_funcs, | ||||||
|  |                                        sizeof(IOWatchPoll)); | ||||||
|     iwp->fd_can_read = fd_can_read; |     iwp->fd_can_read = fd_can_read; | ||||||
|     iwp->opaque = user_data; |     iwp->opaque = user_data; | ||||||
|     iwp->ioc = ioc; |     iwp->ioc = ioc; | ||||||
|     iwp->fd_read = (GSourceFunc) fd_read; |     iwp->fd_read = (GSourceFunc) fd_read; | ||||||
|     iwp->src = NULL; |     iwp->src = NULL; | ||||||
|  |     iwp->context = context; | ||||||
| 
 | 
 | ||||||
|     tag = g_source_attach(&iwp->parent, NULL); |     tag = g_source_attach(&iwp->parent, context); | ||||||
|     g_source_unref(&iwp->parent); |     g_source_unref(&iwp->parent); | ||||||
|     return tag; |     return tag; | ||||||
| } | } | ||||||
| @ -1064,7 +1084,8 @@ static GSource *fd_chr_add_watch(CharDriverState *chr, GIOCondition cond) | |||||||
|     return qio_channel_create_watch(s->ioc_out, cond); |     return qio_channel_create_watch(s->ioc_out, cond); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static void fd_chr_update_read_handler(CharDriverState *chr) | static void fd_chr_update_read_handler(CharDriverState *chr, | ||||||
|  |                                        GMainContext *context) | ||||||
| { | { | ||||||
|     FDCharDriver *s = chr->opaque; |     FDCharDriver *s = chr->opaque; | ||||||
| 
 | 
 | ||||||
| @ -1072,7 +1093,8 @@ static void fd_chr_update_read_handler(CharDriverState *chr) | |||||||
|     if (s->ioc_in) { |     if (s->ioc_in) { | ||||||
|         chr->fd_in_tag = io_add_watch_poll(s->ioc_in, |         chr->fd_in_tag = io_add_watch_poll(s->ioc_in, | ||||||
|                                            fd_chr_read_poll, |                                            fd_chr_read_poll, | ||||||
|                                            fd_chr_read, chr); |                                            fd_chr_read, chr, | ||||||
|  |                                            context); | ||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| @ -1319,7 +1341,8 @@ static void pty_chr_update_read_handler_locked(CharDriverState *chr) | |||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static void pty_chr_update_read_handler(CharDriverState *chr) | static void pty_chr_update_read_handler(CharDriverState *chr, | ||||||
|  |                                         GMainContext *context) | ||||||
| { | { | ||||||
|     qemu_mutex_lock(&chr->chr_write_lock); |     qemu_mutex_lock(&chr->chr_write_lock); | ||||||
|     pty_chr_update_read_handler_locked(chr); |     pty_chr_update_read_handler_locked(chr); | ||||||
| @ -1423,7 +1446,8 @@ static void pty_chr_state(CharDriverState *chr, int connected) | |||||||
|         if (!chr->fd_in_tag) { |         if (!chr->fd_in_tag) { | ||||||
|             chr->fd_in_tag = io_add_watch_poll(s->ioc, |             chr->fd_in_tag = io_add_watch_poll(s->ioc, | ||||||
|                                                pty_chr_read_poll, |                                                pty_chr_read_poll, | ||||||
|                                                pty_chr_read, chr); |                                                pty_chr_read, | ||||||
|  |                                                chr, NULL); | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| } | } | ||||||
| @ -2565,7 +2589,8 @@ static gboolean udp_chr_read(QIOChannel *chan, GIOCondition cond, void *opaque) | |||||||
|     return TRUE; |     return TRUE; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static void udp_chr_update_read_handler(CharDriverState *chr) | static void udp_chr_update_read_handler(CharDriverState *chr, | ||||||
|  |                                         GMainContext *context) | ||||||
| { | { | ||||||
|     NetCharDriver *s = chr->opaque; |     NetCharDriver *s = chr->opaque; | ||||||
| 
 | 
 | ||||||
| @ -2573,7 +2598,8 @@ static void udp_chr_update_read_handler(CharDriverState *chr) | |||||||
|     if (s->ioc) { |     if (s->ioc) { | ||||||
|         chr->fd_in_tag = io_add_watch_poll(s->ioc, |         chr->fd_in_tag = io_add_watch_poll(s->ioc, | ||||||
|                                            udp_chr_read_poll, |                                            udp_chr_read_poll, | ||||||
|                                            udp_chr_read, chr); |                                            udp_chr_read, chr, | ||||||
|  |                                            context); | ||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| @ -2976,12 +3002,14 @@ static void tcp_chr_connect(void *opaque) | |||||||
|     if (s->ioc) { |     if (s->ioc) { | ||||||
|         chr->fd_in_tag = io_add_watch_poll(s->ioc, |         chr->fd_in_tag = io_add_watch_poll(s->ioc, | ||||||
|                                            tcp_chr_read_poll, |                                            tcp_chr_read_poll, | ||||||
|                                            tcp_chr_read, chr); |                                            tcp_chr_read, | ||||||
|  |                                            chr, NULL); | ||||||
|     } |     } | ||||||
|     qemu_chr_be_generic_open(chr); |     qemu_chr_be_generic_open(chr); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static void tcp_chr_update_read_handler(CharDriverState *chr) | static void tcp_chr_update_read_handler(CharDriverState *chr, | ||||||
|  |                                         GMainContext *context) | ||||||
| { | { | ||||||
|     TCPCharDriver *s = chr->opaque; |     TCPCharDriver *s = chr->opaque; | ||||||
| 
 | 
 | ||||||
| @ -2993,7 +3021,8 @@ static void tcp_chr_update_read_handler(CharDriverState *chr) | |||||||
|     if (s->ioc) { |     if (s->ioc) { | ||||||
|         chr->fd_in_tag = io_add_watch_poll(s->ioc, |         chr->fd_in_tag = io_add_watch_poll(s->ioc, | ||||||
|                                            tcp_chr_read_poll, |                                            tcp_chr_read_poll, | ||||||
|                                            tcp_chr_read, chr); |                                            tcp_chr_read, chr, | ||||||
|  |                                            context); | ||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -1598,10 +1598,11 @@ DEF("netdev", HAS_ARG, QEMU_OPTION_netdev, | |||||||
|     "                configure a host TAP network backend with ID 'str'\n" |     "                configure a host TAP network backend with ID 'str'\n" | ||||||
| #else | #else | ||||||
|     "-netdev tap,id=str[,fd=h][,fds=x:y:...:z][,ifname=name][,script=file][,downscript=dfile]\n" |     "-netdev tap,id=str[,fd=h][,fds=x:y:...:z][,ifname=name][,script=file][,downscript=dfile]\n" | ||||||
|     "         [,helper=helper][,sndbuf=nbytes][,vnet_hdr=on|off][,vhost=on|off]\n" |     "         [,br=bridge][,helper=helper][,sndbuf=nbytes][,vnet_hdr=on|off][,vhost=on|off]\n" | ||||||
|     "         [,vhostfd=h][,vhostfds=x:y:...:z][,vhostforce=on|off][,queues=n]\n" |     "         [,vhostfd=h][,vhostfds=x:y:...:z][,vhostforce=on|off][,queues=n]\n" | ||||||
|     "         [,poll-us=n]\n" |     "         [,poll-us=n]\n" | ||||||
|     "                configure a host TAP network backend with ID 'str'\n" |     "                configure a host TAP network backend with ID 'str'\n" | ||||||
|  |     "                connected to a bridge (default=" DEFAULT_BRIDGE_INTERFACE ")\n" | ||||||
|     "                use network scripts 'file' (default=" DEFAULT_NETWORK_SCRIPT ")\n" |     "                use network scripts 'file' (default=" DEFAULT_NETWORK_SCRIPT ")\n" | ||||||
|     "                to configure it and 'dfile' (default=" DEFAULT_NETWORK_DOWN_SCRIPT ")\n" |     "                to configure it and 'dfile' (default=" DEFAULT_NETWORK_DOWN_SCRIPT ")\n" | ||||||
|     "                to deconfigure it\n" |     "                to deconfigure it\n" | ||||||
| @ -1888,8 +1889,8 @@ processed and applied to -net user. Mixing them with the new configuration | |||||||
| syntax gives undefined results. Their use for new applications is discouraged | syntax gives undefined results. Their use for new applications is discouraged | ||||||
| as they will be removed from future versions. | as they will be removed from future versions. | ||||||
| 
 | 
 | ||||||
| @item -netdev tap,id=@var{id}[,fd=@var{h}][,ifname=@var{name}][,script=@var{file}][,downscript=@var{dfile}][,helper=@var{helper}] | @item -netdev tap,id=@var{id}[,fd=@var{h}][,ifname=@var{name}][,script=@var{file}][,downscript=@var{dfile}][,br=@var{bridge}][,helper=@var{helper}] | ||||||
| @itemx -net tap[,vlan=@var{n}][,name=@var{name}][,fd=@var{h}][,ifname=@var{name}][,script=@var{file}][,downscript=@var{dfile}][,helper=@var{helper}] | @itemx -net tap[,vlan=@var{n}][,name=@var{name}][,fd=@var{h}][,ifname=@var{name}][,script=@var{file}][,downscript=@var{dfile}][,br=@var{bridge}][,helper=@var{helper}] | ||||||
| Connect the host TAP network interface @var{name} to VLAN @var{n}. | Connect the host TAP network interface @var{name} to VLAN @var{n}. | ||||||
| 
 | 
 | ||||||
| Use the network script @var{file} to configure it and the network script | Use the network script @var{file} to configure it and the network script | ||||||
| @ -1900,8 +1901,9 @@ automatically provides one. The default network configure script is | |||||||
| to disable script execution. | to disable script execution. | ||||||
| 
 | 
 | ||||||
| If running QEMU as an unprivileged user, use the network helper | If running QEMU as an unprivileged user, use the network helper | ||||||
| @var{helper} to configure the TAP interface. The default network | @var{helper} to configure the TAP interface and attach it to the bridge. | ||||||
| helper executable is @file{/path/to/qemu-bridge-helper}. | The default network helper executable is @file{/path/to/qemu-bridge-helper} | ||||||
|  | and the default bridge device is @file{br0}. | ||||||
| 
 | 
 | ||||||
| @option{fd}=@var{h} can be used to specify the handle of an already | @option{fd}=@var{h} can be used to specify the handle of an already | ||||||
| opened host TAP interface. | opened host TAP interface. | ||||||
| @ -3887,6 +3889,19 @@ Create a filter-redirector we need to differ outdev id from indev id, id can not | |||||||
| be the same. we can just use indev or outdev, but at least one of indev or outdev | be the same. we can just use indev or outdev, but at least one of indev or outdev | ||||||
| need to be specified. | need to be specified. | ||||||
| 
 | 
 | ||||||
|  | @item -object filter-rewriter,id=@var{id},netdev=@var{netdevid},rewriter-mode=@var{mode}[,queue=@var{all|rx|tx}] | ||||||
|  | 
 | ||||||
|  | Filter-rewriter is a part of COLO project.It will rewrite tcp packet to | ||||||
|  | secondary from primary to keep secondary tcp connection,and rewrite | ||||||
|  | tcp packet to primary from secondary make tcp packet can be handled by | ||||||
|  | client. | ||||||
|  | 
 | ||||||
|  | usage: | ||||||
|  | colo secondary: | ||||||
|  | -object filter-redirector,id=f1,netdev=hn0,queue=tx,indev=red0 | ||||||
|  | -object filter-redirector,id=f2,netdev=hn0,queue=rx,outdev=red1 | ||||||
|  | -object filter-rewriter,id=rew0,netdev=hn0,queue=all | ||||||
|  | 
 | ||||||
| @item -object filter-dump,id=@var{id},netdev=@var{dev},file=@var{filename}][,maxlen=@var{len}] | @item -object filter-dump,id=@var{id},netdev=@var{dev},file=@var{filename}][,maxlen=@var{len}] | ||||||
| 
 | 
 | ||||||
| Dump the network traffic on netdev @var{dev} to the file specified by | Dump the network traffic on netdev @var{dev} to the file specified by | ||||||
| @ -3894,6 +3909,45 @@ Dump the network traffic on netdev @var{dev} to the file specified by | |||||||
| The file format is libpcap, so it can be analyzed with tools such as tcpdump | The file format is libpcap, so it can be analyzed with tools such as tcpdump | ||||||
| or Wireshark. | or Wireshark. | ||||||
| 
 | 
 | ||||||
|  | @item -object colo-compare,id=@var{id},primary_in=@var{chardevid},secondary_in=@var{chardevid}, | ||||||
|  | outdev=@var{chardevid} | ||||||
|  | 
 | ||||||
|  | Colo-compare gets packet from primary_in@var{chardevid} and secondary_in@var{chardevid}, than compare primary packet with | ||||||
|  | secondary packet. If the packets are same, we will output primary | ||||||
|  | packet to outdev@var{chardevid}, else we will notify colo-frame | ||||||
|  | do checkpoint and send primary packet to outdev@var{chardevid}. | ||||||
|  | 
 | ||||||
|  | we must use it with the help of filter-mirror and filter-redirector. | ||||||
|  | 
 | ||||||
|  | @example | ||||||
|  | 
 | ||||||
|  | primary: | ||||||
|  | -netdev tap,id=hn0,vhost=off,script=/etc/qemu-ifup,downscript=/etc/qemu-ifdown | ||||||
|  | -device e1000,id=e0,netdev=hn0,mac=52:a4:00:12:78:66 | ||||||
|  | -chardev socket,id=mirror0,host=3.3.3.3,port=9003,server,nowait | ||||||
|  | -chardev socket,id=compare1,host=3.3.3.3,port=9004,server,nowait | ||||||
|  | -chardev socket,id=compare0,host=3.3.3.3,port=9001,server,nowait | ||||||
|  | -chardev socket,id=compare0-0,host=3.3.3.3,port=9001 | ||||||
|  | -chardev socket,id=compare_out,host=3.3.3.3,port=9005,server,nowait | ||||||
|  | -chardev socket,id=compare_out0,host=3.3.3.3,port=9005 | ||||||
|  | -object filter-mirror,id=m0,netdev=hn0,queue=tx,outdev=mirror0 | ||||||
|  | -object filter-redirector,netdev=hn0,id=redire0,queue=rx,indev=compare_out | ||||||
|  | -object filter-redirector,netdev=hn0,id=redire1,queue=rx,outdev=compare0 | ||||||
|  | -object colo-compare,id=comp0,primary_in=compare0-0,secondary_in=compare1,outdev=compare_out0 | ||||||
|  | 
 | ||||||
|  | secondary: | ||||||
|  | -netdev tap,id=hn0,vhost=off,script=/etc/qemu-ifup,down script=/etc/qemu-ifdown | ||||||
|  | -device e1000,netdev=hn0,mac=52:a4:00:12:78:66 | ||||||
|  | -chardev socket,id=red0,host=3.3.3.3,port=9003 | ||||||
|  | -chardev socket,id=red1,host=3.3.3.3,port=9004 | ||||||
|  | -object filter-redirector,id=f1,netdev=hn0,queue=tx,indev=red0 | ||||||
|  | -object filter-redirector,id=f2,netdev=hn0,queue=rx,outdev=red1 | ||||||
|  | 
 | ||||||
|  | @end example | ||||||
|  | 
 | ||||||
|  | If you want to know the detail of above command line, you can read | ||||||
|  | the colo-compare git log. | ||||||
|  | 
 | ||||||
| @item -object secret,id=@var{id},data=@var{string},format=@var{raw|base64}[,keyid=@var{secretid},iv=@var{string}] | @item -object secret,id=@var{id},data=@var{string},format=@var{raw|base64}[,keyid=@var{secretid},iv=@var{string}] | ||||||
| @item -object secret,id=@var{id},file=@var{filename},format=@var{raw|base64}[,keyid=@var{secretid},iv=@var{string}] | @item -object secret,id=@var{id},file=@var{filename},format=@var{raw|base64}[,keyid=@var{secretid},iv=@var{string}] | ||||||
| 
 | 
 | ||||||
|  | |||||||
							
								
								
									
										16
									
								
								trace-events
									
									
									
									
									
								
							
							
						
						
									
										16
									
								
								trace-events
									
									
									
									
									
								
							| @ -139,6 +139,22 @@ memory_region_subpage_write(int cpu_index, void *mr, uint64_t offset, uint64_t v | |||||||
| memory_region_tb_read(int cpu_index, uint64_t addr, uint64_t value, unsigned size) "cpu %d addr %#"PRIx64" value %#"PRIx64" size %u" | memory_region_tb_read(int cpu_index, uint64_t addr, uint64_t value, unsigned size) "cpu %d addr %#"PRIx64" value %#"PRIx64" size %u" | ||||||
| memory_region_tb_write(int cpu_index, uint64_t addr, uint64_t value, unsigned size) "cpu %d addr %#"PRIx64" value %#"PRIx64" size %u" | memory_region_tb_write(int cpu_index, uint64_t addr, uint64_t value, unsigned size) "cpu %d addr %#"PRIx64" value %#"PRIx64" size %u" | ||||||
| 
 | 
 | ||||||
|  | # net/colo.c | ||||||
|  | colo_proxy_main(const char *chr) ": %s" | ||||||
|  | 
 | ||||||
|  | # net/colo-compare.c | ||||||
|  | colo_compare_main(const char *chr) ": %s" | ||||||
|  | colo_compare_udp_miscompare(const char *sta, int size) ": %s = %d" | ||||||
|  | colo_compare_icmp_miscompare(const char *sta, int size) ": %s = %d" | ||||||
|  | colo_compare_ip_info(int psize, const char *sta, const char *stb, int ssize, const char *stc, const char *std) "ppkt size = %d, ip_src = %s, ip_dst = %s, spkt size = %d, ip_src = %s, ip_dst = %s" | ||||||
|  | colo_old_packet_check_found(int64_t old_time) "%" PRId64 | ||||||
|  | colo_compare_miscompare(void) "" | ||||||
|  | 
 | ||||||
|  | # net/filter-rewriter.c | ||||||
|  | colo_filter_rewriter_debug(void) "" | ||||||
|  | colo_filter_rewriter_pkt_info(const char *func, const char *src, const char *dst, uint32_t seq, uint32_t ack, uint32_t flag) "%s: src/dst: %s/%s p: seq/ack=%u/%u  flags=%x\n" | ||||||
|  | colo_filter_rewriter_conn_offset(uint32_t offset) ": offset=%u\n" | ||||||
|  | 
 | ||||||
| ### Guest events, keep at bottom | ### Guest events, keep at bottom | ||||||
| 
 | 
 | ||||||
| # @vaddr: Access' virtual address. | # @vaddr: Access' virtual address. | ||||||
|  | |||||||
							
								
								
									
										4
									
								
								vl.c
									
									
									
									
									
								
							
							
						
						
									
										4
									
								
								vl.c
									
									
									
									
									
								
							| @ -2845,7 +2845,9 @@ static bool object_create_initial(const char *type) | |||||||
|     if (g_str_equal(type, "filter-buffer") || |     if (g_str_equal(type, "filter-buffer") || | ||||||
|         g_str_equal(type, "filter-dump") || |         g_str_equal(type, "filter-dump") || | ||||||
|         g_str_equal(type, "filter-mirror") || |         g_str_equal(type, "filter-mirror") || | ||||||
|         g_str_equal(type, "filter-redirector")) { |         g_str_equal(type, "filter-redirector") || | ||||||
|  |         g_str_equal(type, "colo-compare") || | ||||||
|  |         g_str_equal(type, "filter-rewriter")) { | ||||||
|         return false; |         return false; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user
	 Peter Maydell
						Peter Maydell