Файловый менеджер - Редактировать - /var/www/html/include.zip
Ðазад
PK ! KrZ�B B rdma/uverbs_types.hnu �[��� /* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ /* * Copyright (c) 2017, Mellanox Technologies inc. All rights reserved. */ #ifndef _UVERBS_TYPES_ #define _UVERBS_TYPES_ #include <linux/kernel.h> #include <rdma/ib_verbs.h> struct uverbs_obj_type; struct uverbs_api_object; enum rdma_lookup_mode { UVERBS_LOOKUP_READ, UVERBS_LOOKUP_WRITE, /* * Destroy is like LOOKUP_WRITE, except that the uobject is not * locked. uobj_destroy is used to convert a LOOKUP_DESTROY lock into * a LOOKUP_WRITE lock. */ UVERBS_LOOKUP_DESTROY, }; /* * The following sequences are valid: * Success flow: * alloc_begin * alloc_commit * [..] * Access flow: * lookup_get(exclusive=false) & uverbs_try_lock_object * lookup_put(exclusive=false) via rdma_lookup_put_uobject * Destruction flow: * lookup_get(exclusive=true) & uverbs_try_lock_object * remove_commit * remove_handle (optional) * lookup_put(exclusive=true) via rdma_lookup_put_uobject * * Allocate Error flow #1 * alloc_begin * alloc_abort * Allocate Error flow #2 * alloc_begin * remove_commit * alloc_abort * Allocate Error flow #3 * alloc_begin * alloc_commit (fails) * remove_commit * alloc_abort * * In all cases the caller must hold the ufile kref until alloc_commit or * alloc_abort returns. */ struct uverbs_obj_type_class { struct ib_uobject *(*alloc_begin)(const struct uverbs_api_object *obj, struct uverbs_attr_bundle *attrs); /* This consumes the kref on uobj */ void (*alloc_commit)(struct ib_uobject *uobj); /* This does not consume the kref on uobj */ void (*alloc_abort)(struct ib_uobject *uobj); struct ib_uobject *(*lookup_get)(const struct uverbs_api_object *obj, struct ib_uverbs_file *ufile, s64 id, enum rdma_lookup_mode mode); void (*lookup_put)(struct ib_uobject *uobj, enum rdma_lookup_mode mode); /* This does not consume the kref on uobj */ int __must_check (*destroy_hw)(struct ib_uobject *uobj, enum rdma_remove_reason why, struct uverbs_attr_bundle *attrs); void (*remove_handle)(struct ib_uobject *uobj); void (*swap_uobjects)(struct ib_uobject *obj_old, struct ib_uobject *obj_new); }; struct uverbs_obj_type { const struct uverbs_obj_type_class * const type_class; size_t obj_size; }; /* * Objects type classes which support a detach state (object is still alive but * it's not attached to any context need to make sure: * (a) no call through to a driver after a detach is called * (b) detach isn't called concurrently with context_cleanup */ struct uverbs_obj_idr_type { /* * In idr based objects, uverbs_obj_type_class points to a generic * idr operations. In order to specialize the underlying types (e.g. CQ, * QPs, etc.), we add destroy_object specific callbacks. */ struct uverbs_obj_type type; /* Free driver resources from the uobject, make the driver uncallable, * and move the uobject to the detached state. If the object was * destroyed by the user's request, a failure should leave the uobject * completely unchanged. */ int __must_check (*destroy_object)(struct ib_uobject *uobj, enum rdma_remove_reason why, struct uverbs_attr_bundle *attrs); }; struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_api_object *obj, struct ib_uverbs_file *ufile, s64 id, enum rdma_lookup_mode mode, struct uverbs_attr_bundle *attrs); void rdma_lookup_put_uobject(struct ib_uobject *uobj, enum rdma_lookup_mode mode); struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_api_object *obj, struct uverbs_attr_bundle *attrs); void rdma_alloc_abort_uobject(struct ib_uobject *uobj, struct uverbs_attr_bundle *attrs, bool hw_obj_valid); void rdma_alloc_commit_uobject(struct ib_uobject *uobj, struct uverbs_attr_bundle *attrs); void rdma_assign_uobject(struct ib_uobject *to_uobj, struct ib_uobject *new_uobj, struct uverbs_attr_bundle *attrs); /* * uverbs_uobject_get is called in order to increase the reference count on * an uobject. This is useful when a handler wants to keep the uobject's memory * alive, regardless if this uobject is still alive in the context's objects * repository. Objects are put via uverbs_uobject_put. */ static inline void uverbs_uobject_get(struct ib_uobject *uobject) { kref_get(&uobject->ref); } void uverbs_uobject_put(struct ib_uobject *uobject); struct uverbs_obj_fd_type { /* * In fd based objects, uverbs_obj_type_ops points to generic * fd operations. In order to specialize the underlying types (e.g. * completion_channel), we use fops, name and flags for fd creation. * destroy_object is called when the uobject is to be destroyed, * because the driver is removed or the FD is closed. */ struct uverbs_obj_type type; void (*destroy_object)(struct ib_uobject *uobj, enum rdma_remove_reason why); const struct file_operations *fops; const char *name; int flags; }; extern const struct uverbs_obj_type_class uverbs_idr_class; extern const struct uverbs_obj_type_class uverbs_fd_class; int uverbs_uobject_fd_release(struct inode *inode, struct file *filp); #define UVERBS_BUILD_BUG_ON(cond) (sizeof(char[1 - 2 * !!(cond)]) - \ sizeof(char)) #define UVERBS_TYPE_ALLOC_FD(_obj_size, _destroy_object, _fops, _name, _flags) \ ((&((const struct uverbs_obj_fd_type) \ {.type = { \ .type_class = &uverbs_fd_class, \ .obj_size = (_obj_size) + \ UVERBS_BUILD_BUG_ON((_obj_size) < \ sizeof(struct ib_uobject)), \ }, \ .destroy_object = _destroy_object, \ .fops = _fops, \ .name = _name, \ .flags = _flags}))->type) #define UVERBS_TYPE_ALLOC_IDR_SZ(_size, _destroy_object) \ ((&((const struct uverbs_obj_idr_type) \ {.type = { \ .type_class = &uverbs_idr_class, \ .obj_size = (_size) + \ UVERBS_BUILD_BUG_ON((_size) < \ sizeof(struct ib_uobject)) \ }, \ .destroy_object = _destroy_object,}))->type) #define UVERBS_TYPE_ALLOC_IDR(_destroy_object) \ UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uobject), \ _destroy_object) #endif PK ! ��� � rdma/ib_addr.hnu �[��� /* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ /* * Copyright (c) 2005 Voltaire Inc. All rights reserved. * Copyright (c) 2005 Intel Corporation. All rights reserved. */ #ifndef IB_ADDR_H #define IB_ADDR_H #include <linux/ethtool.h> #include <linux/in.h> #include <linux/in6.h> #include <linux/if_arp.h> #include <linux/netdevice.h> #include <linux/inetdevice.h> #include <linux/socket.h> #include <linux/if_vlan.h> #include <net/ipv6.h> #include <net/if_inet6.h> #include <net/ip.h> #include <rdma/ib_verbs.h> #include <rdma/ib_pack.h> #include <net/net_namespace.h> /** * struct rdma_dev_addr - Contains resolved RDMA hardware addresses * @src_dev_addr: Source MAC address. * @dst_dev_addr: Destination MAC address. * @broadcast: Broadcast address of the device. * @dev_type: The interface hardware type of the device. * @bound_dev_if: An optional device interface index. * @transport: The transport type used. * @net: Network namespace containing the bound_dev_if net_dev. * @sgid_attr: GID attribute to use for identified SGID */ struct rdma_dev_addr { unsigned char src_dev_addr[MAX_ADDR_LEN]; unsigned char dst_dev_addr[MAX_ADDR_LEN]; unsigned char broadcast[MAX_ADDR_LEN]; unsigned short dev_type; int bound_dev_if; enum rdma_transport_type transport; struct net *net; const struct ib_gid_attr *sgid_attr; enum rdma_network_type network; int hoplimit; }; /** * rdma_translate_ip - Translate a local IP address to an RDMA hardware * address. * * The dev_addr->net field must be initialized. */ int rdma_translate_ip(const struct sockaddr *addr, struct rdma_dev_addr *dev_addr); /** * rdma_resolve_ip - Resolve source and destination IP addresses to * RDMA hardware addresses. * @src_addr: An optional source address to use in the resolution. If a * source address is not provided, a usable address will be returned via * the callback. * @dst_addr: The destination address to resolve. * @addr: A reference to a data location that will receive the resolved * addresses. The data location must remain valid until the callback has * been invoked. The net field of the addr struct must be valid. * @timeout_ms: Amount of time to wait for the address resolution to complete. * @callback: Call invoked once address resolution has completed, timed out, * or been canceled. A status of 0 indicates success. * @resolve_by_gid_attr: Resolve the ip based on the GID attribute from * rdma_dev_addr. * @context: User-specified context associated with the call. */ int rdma_resolve_ip(struct sockaddr *src_addr, const struct sockaddr *dst_addr, struct rdma_dev_addr *addr, unsigned long timeout_ms, void (*callback)(int status, struct sockaddr *src_addr, struct rdma_dev_addr *addr, void *context), bool resolve_by_gid_attr, void *context); void rdma_addr_cancel(struct rdma_dev_addr *addr); int rdma_addr_size(const struct sockaddr *addr); int rdma_addr_size_in6(struct sockaddr_in6 *addr); int rdma_addr_size_kss(struct __kernel_sockaddr_storage *addr); static inline u16 ib_addr_get_pkey(struct rdma_dev_addr *dev_addr) { return ((u16)dev_addr->broadcast[8] << 8) | (u16)dev_addr->broadcast[9]; } static inline void ib_addr_set_pkey(struct rdma_dev_addr *dev_addr, u16 pkey) { dev_addr->broadcast[8] = pkey >> 8; dev_addr->broadcast[9] = (unsigned char) pkey; } static inline void ib_addr_get_mgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid) { memcpy(gid, dev_addr->broadcast + 4, sizeof *gid); } static inline int rdma_addr_gid_offset(struct rdma_dev_addr *dev_addr) { return dev_addr->dev_type == ARPHRD_INFINIBAND ? 4 : 0; } static inline u16 rdma_vlan_dev_vlan_id(const struct net_device *dev) { return is_vlan_dev(dev) ? vlan_dev_vlan_id(dev) : 0xffff; } static inline int rdma_ip2gid(struct sockaddr *addr, union ib_gid *gid) { switch (addr->sa_family) { case AF_INET: ipv6_addr_set_v4mapped(((struct sockaddr_in *) addr)->sin_addr.s_addr, (struct in6_addr *)gid); break; case AF_INET6: *(struct in6_addr *)&gid->raw = ((struct sockaddr_in6 *)addr)->sin6_addr; break; default: return -EINVAL; } return 0; } /* Important - sockaddr should be a union of sockaddr_in and sockaddr_in6 */ static inline void rdma_gid2ip(struct sockaddr *out, const union ib_gid *gid) { if (ipv6_addr_v4mapped((struct in6_addr *)gid)) { struct sockaddr_in *out_in = (struct sockaddr_in *)out; memset(out_in, 0, sizeof(*out_in)); out_in->sin_family = AF_INET; memcpy(&out_in->sin_addr.s_addr, gid->raw + 12, 4); } else { struct sockaddr_in6 *out_in = (struct sockaddr_in6 *)out; memset(out_in, 0, sizeof(*out_in)); out_in->sin6_family = AF_INET6; memcpy(&out_in->sin6_addr.s6_addr, gid->raw, 16); } } /* * rdma_get/set_sgid/dgid() APIs are applicable to IB, and iWarp. * They are not applicable to RoCE. * RoCE GIDs are derived from the IP addresses. */ static inline void rdma_addr_get_sgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid) { memcpy(gid, dev_addr->src_dev_addr + rdma_addr_gid_offset(dev_addr), sizeof(*gid)); } static inline void rdma_addr_set_sgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid) { memcpy(dev_addr->src_dev_addr + rdma_addr_gid_offset(dev_addr), gid, sizeof *gid); } static inline void rdma_addr_get_dgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid) { memcpy(gid, dev_addr->dst_dev_addr + rdma_addr_gid_offset(dev_addr), sizeof *gid); } static inline void rdma_addr_set_dgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid) { memcpy(dev_addr->dst_dev_addr + rdma_addr_gid_offset(dev_addr), gid, sizeof *gid); } static inline enum ib_mtu iboe_get_mtu(int mtu) { /* * Reduce IB headers from effective IBoE MTU. */ mtu = mtu - (IB_GRH_BYTES + IB_UDP_BYTES + IB_BTH_BYTES + IB_EXT_XRC_BYTES + IB_EXT_ATOMICETH_BYTES + IB_ICRC_BYTES); if (mtu >= ib_mtu_enum_to_int(IB_MTU_4096)) return IB_MTU_4096; else if (mtu >= ib_mtu_enum_to_int(IB_MTU_2048)) return IB_MTU_2048; else if (mtu >= ib_mtu_enum_to_int(IB_MTU_1024)) return IB_MTU_1024; else if (mtu >= ib_mtu_enum_to_int(IB_MTU_512)) return IB_MTU_512; else if (mtu >= ib_mtu_enum_to_int(IB_MTU_256)) return IB_MTU_256; else return 0; } static inline int rdma_link_local_addr(struct in6_addr *addr) { if (addr->s6_addr32[0] == htonl(0xfe800000) && addr->s6_addr32[1] == 0) return 1; return 0; } static inline void rdma_get_ll_mac(struct in6_addr *addr, u8 *mac) { memcpy(mac, &addr->s6_addr[8], 3); memcpy(mac + 3, &addr->s6_addr[13], 3); mac[0] ^= 2; } static inline int rdma_is_multicast_addr(struct in6_addr *addr) { __be32 ipv4_addr; if (addr->s6_addr[0] == 0xff) return 1; ipv4_addr = addr->s6_addr32[3]; return (ipv6_addr_v4mapped(addr) && ipv4_is_multicast(ipv4_addr)); } static inline void rdma_get_mcast_mac(struct in6_addr *addr, u8 *mac) { int i; mac[0] = 0x33; mac[1] = 0x33; for (i = 2; i < 6; ++i) mac[i] = addr->s6_addr[i + 10]; } static inline u16 rdma_get_vlan_id(union ib_gid *dgid) { u16 vid; vid = dgid->raw[11] << 8 | dgid->raw[12]; return vid < 0x1000 ? vid : 0xffff; } static inline struct net_device *rdma_vlan_dev_real_dev(const struct net_device *dev) { return is_vlan_dev(dev) ? vlan_dev_real_dev(dev) : NULL; } #endif /* IB_ADDR_H */ PK ! ���y~ ~ rdma/iw_cm.hnu �[��� /* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ /* * Copyright (c) 2005 Network Appliance, Inc. All rights reserved. * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. */ #ifndef IW_CM_H #define IW_CM_H #include <linux/in.h> #include <rdma/ib_cm.h> struct iw_cm_id; enum iw_cm_event_type { IW_CM_EVENT_CONNECT_REQUEST = 1, /* connect request received */ IW_CM_EVENT_CONNECT_REPLY, /* reply from active connect request */ IW_CM_EVENT_ESTABLISHED, /* passive side accept successful */ IW_CM_EVENT_DISCONNECT, /* orderly shutdown */ IW_CM_EVENT_CLOSE /* close complete */ }; struct iw_cm_event { enum iw_cm_event_type event; int status; struct sockaddr_storage local_addr; struct sockaddr_storage remote_addr; void *private_data; void *provider_data; u8 private_data_len; u8 ord; u8 ird; }; /** * iw_cm_handler - Function to be called by the IW CM when delivering events * to the client. * * @cm_id: The IW CM identifier associated with the event. * @event: Pointer to the event structure. */ typedef int (*iw_cm_handler)(struct iw_cm_id *cm_id, struct iw_cm_event *event); /** * iw_event_handler - Function called by the provider when delivering provider * events to the IW CM. Returns either 0 indicating the event was processed * or -errno if the event could not be processed. * * @cm_id: The IW CM identifier associated with the event. * @event: Pointer to the event structure. */ typedef int (*iw_event_handler)(struct iw_cm_id *cm_id, struct iw_cm_event *event); struct iw_cm_id { iw_cm_handler cm_handler; /* client callback function */ void *context; /* client cb context */ struct ib_device *device; struct sockaddr_storage local_addr; /* local addr */ struct sockaddr_storage remote_addr; struct sockaddr_storage m_local_addr; /* nmapped local addr */ struct sockaddr_storage m_remote_addr; /* nmapped rem addr */ void *provider_data; /* provider private data */ iw_event_handler event_handler; /* cb for provider events */ /* Used by provider to add and remove refs on IW cm_id */ void (*add_ref)(struct iw_cm_id *); void (*rem_ref)(struct iw_cm_id *); u8 tos; bool tos_set:1; bool mapped:1; bool afonly:1; }; struct iw_cm_conn_param { const void *private_data; u16 private_data_len; u32 ord; u32 ird; u32 qpn; }; enum iw_flags { /* * This flag allows the iwcm and iwpmd to still advertise * mappings but the real and mapped port numbers are the * same. Further, iwpmd will not bind any user socket to * reserve the port. This is required for soft iwarp * to play in the port mapped iwarp space. */ IW_F_NO_PORT_MAP = (1 << 0), }; /** * iw_create_cm_id - Create an IW CM identifier. * * @device: The IB device on which to create the IW CM identier. * @event_handler: User callback invoked to report events associated with the * returned IW CM identifier. * @context: User specified context associated with the id. */ struct iw_cm_id *iw_create_cm_id(struct ib_device *device, iw_cm_handler cm_handler, void *context); /** * iw_destroy_cm_id - Destroy an IW CM identifier. * * @cm_id: The previously created IW CM identifier to destroy. * * The client can assume that no events will be delivered for the CM ID after * this function returns. */ void iw_destroy_cm_id(struct iw_cm_id *cm_id); /** * iw_cm_bind_qp - Unbind the specified IW CM identifier and QP * * @cm_id: The IW CM idenfier to unbind from the QP. * @qp: The QP * * This is called by the provider when destroying the QP to ensure * that any references held by the IWCM are released. It may also * be called by the IWCM when destroying a CM_ID to that any * references held by the provider are released. */ void iw_cm_unbind_qp(struct iw_cm_id *cm_id, struct ib_qp *qp); /** * iw_cm_get_qp - Return the ib_qp associated with a QPN * * @ib_device: The IB device * @qpn: The queue pair number */ struct ib_qp *iw_cm_get_qp(struct ib_device *device, int qpn); /** * iw_cm_listen - Listen for incoming connection requests on the * specified IW CM id. * * @cm_id: The IW CM identifier. * @backlog: The maximum number of outstanding un-accepted inbound listen * requests to queue. * * The source address and port number are specified in the IW CM identifier * structure. */ int iw_cm_listen(struct iw_cm_id *cm_id, int backlog); /** * iw_cm_accept - Called to accept an incoming connect request. * * @cm_id: The IW CM identifier associated with the connection request. * @iw_param: Pointer to a structure containing connection establishment * parameters. * * The specified cm_id will have been provided in the event data for a * CONNECT_REQUEST event. Subsequent events related to this connection will be * delivered to the specified IW CM identifier prior and may occur prior to * the return of this function. If this function returns a non-zero value, the * client can assume that no events will be delivered to the specified IW CM * identifier. */ int iw_cm_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param); /** * iw_cm_reject - Reject an incoming connection request. * * @cm_id: Connection identifier associated with the request. * @private_daa: Pointer to data to deliver to the remote peer as part of the * reject message. * @private_data_len: The number of bytes in the private_data parameter. * * The client can assume that no events will be delivered to the specified IW * CM identifier following the return of this function. The private_data * buffer is available for reuse when this function returns. */ int iw_cm_reject(struct iw_cm_id *cm_id, const void *private_data, u8 private_data_len); /** * iw_cm_connect - Called to request a connection to a remote peer. * * @cm_id: The IW CM identifier for the connection. * @iw_param: Pointer to a structure containing connection establishment * parameters. * * Events may be delivered to the specified IW CM identifier prior to the * return of this function. If this function returns a non-zero value, the * client can assume that no events will be delivered to the specified IW CM * identifier. */ int iw_cm_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param); /** * iw_cm_disconnect - Close the specified connection. * * @cm_id: The IW CM identifier to close. * @abrupt: If 0, the connection will be closed gracefully, otherwise, the * connection will be reset. * * The IW CM identifier is still active until the IW_CM_EVENT_CLOSE event is * delivered. */ int iw_cm_disconnect(struct iw_cm_id *cm_id, int abrupt); /** * iw_cm_init_qp_attr - Called to initialize the attributes of the QP * associated with a IW CM identifier. * * @cm_id: The IW CM identifier associated with the QP * @qp_attr: Pointer to the QP attributes structure. * @qp_attr_mask: Pointer to a bit vector specifying which QP attributes are * valid. */ int iw_cm_init_qp_attr(struct iw_cm_id *cm_id, struct ib_qp_attr *qp_attr, int *qp_attr_mask); /** * iwcm_reject_msg - return a pointer to a reject message string. * @reason: Value returned in the REJECT event status field. */ const char *__attribute_const__ iwcm_reject_msg(int reason); #endif /* IW_CM_H */ PK ! =�)�a �a rdma/ib_mad.hnu �[��� /* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ /* * Copyright (c) 2004 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2004 Infinicon Corporation. All rights reserved. * Copyright (c) 2004 Intel Corporation. All rights reserved. * Copyright (c) 2004 Topspin Corporation. All rights reserved. * Copyright (c) 2004-2006 Voltaire Corporation. All rights reserved. */ #ifndef IB_MAD_H #define IB_MAD_H #include <linux/list.h> #include <rdma/ib_verbs.h> #include <uapi/rdma/ib_user_mad.h> /* Management base versions */ #define IB_MGMT_BASE_VERSION 1 #define OPA_MGMT_BASE_VERSION 0x80 #define OPA_SM_CLASS_VERSION 0x80 /* Management classes */ #define IB_MGMT_CLASS_SUBN_LID_ROUTED 0x01 #define IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE 0x81 #define IB_MGMT_CLASS_SUBN_ADM 0x03 #define IB_MGMT_CLASS_PERF_MGMT 0x04 #define IB_MGMT_CLASS_BM 0x05 #define IB_MGMT_CLASS_DEVICE_MGMT 0x06 #define IB_MGMT_CLASS_CM 0x07 #define IB_MGMT_CLASS_SNMP 0x08 #define IB_MGMT_CLASS_DEVICE_ADM 0x10 #define IB_MGMT_CLASS_BOOT_MGMT 0x11 #define IB_MGMT_CLASS_BIS 0x12 #define IB_MGMT_CLASS_CONG_MGMT 0x21 #define IB_MGMT_CLASS_VENDOR_RANGE2_START 0x30 #define IB_MGMT_CLASS_VENDOR_RANGE2_END 0x4F #define IB_OPENIB_OUI (0x001405) /* Management methods */ #define IB_MGMT_METHOD_GET 0x01 #define IB_MGMT_METHOD_SET 0x02 #define IB_MGMT_METHOD_GET_RESP 0x81 #define IB_MGMT_METHOD_SEND 0x03 #define IB_MGMT_METHOD_TRAP 0x05 #define IB_MGMT_METHOD_REPORT 0x06 #define IB_MGMT_METHOD_REPORT_RESP 0x86 #define IB_MGMT_METHOD_TRAP_REPRESS 0x07 #define IB_MGMT_METHOD_RESP 0x80 #define IB_BM_ATTR_MOD_RESP cpu_to_be32(1) #define IB_MGMT_MAX_METHODS 128 /* MAD Status field bit masks */ #define IB_MGMT_MAD_STATUS_SUCCESS 0x0000 #define IB_MGMT_MAD_STATUS_BUSY 0x0001 #define IB_MGMT_MAD_STATUS_REDIRECT_REQD 0x0002 #define IB_MGMT_MAD_STATUS_BAD_VERSION 0x0004 #define IB_MGMT_MAD_STATUS_UNSUPPORTED_METHOD 0x0008 #define IB_MGMT_MAD_STATUS_UNSUPPORTED_METHOD_ATTRIB 0x000c #define IB_MGMT_MAD_STATUS_INVALID_ATTRIB_VALUE 0x001c /* RMPP information */ #define IB_MGMT_RMPP_VERSION 1 #define IB_MGMT_RMPP_TYPE_DATA 1 #define IB_MGMT_RMPP_TYPE_ACK 2 #define IB_MGMT_RMPP_TYPE_STOP 3 #define IB_MGMT_RMPP_TYPE_ABORT 4 #define IB_MGMT_RMPP_FLAG_ACTIVE 1 #define IB_MGMT_RMPP_FLAG_FIRST (1<<1) #define IB_MGMT_RMPP_FLAG_LAST (1<<2) #define IB_MGMT_RMPP_NO_RESPTIME 0x1F #define IB_MGMT_RMPP_STATUS_SUCCESS 0 #define IB_MGMT_RMPP_STATUS_RESX 1 #define IB_MGMT_RMPP_STATUS_ABORT_MIN 118 #define IB_MGMT_RMPP_STATUS_T2L 118 #define IB_MGMT_RMPP_STATUS_BAD_LEN 119 #define IB_MGMT_RMPP_STATUS_BAD_SEG 120 #define IB_MGMT_RMPP_STATUS_BADT 121 #define IB_MGMT_RMPP_STATUS_W2S 122 #define IB_MGMT_RMPP_STATUS_S2B 123 #define IB_MGMT_RMPP_STATUS_BAD_STATUS 124 #define IB_MGMT_RMPP_STATUS_UNV 125 #define IB_MGMT_RMPP_STATUS_TMR 126 #define IB_MGMT_RMPP_STATUS_UNSPEC 127 #define IB_MGMT_RMPP_STATUS_ABORT_MAX 127 #define IB_QP0 0 #define IB_QP1 cpu_to_be32(1) #define IB_QP1_QKEY 0x80010000 #define IB_QP_SET_QKEY 0x80000000 #define IB_DEFAULT_PKEY_PARTIAL 0x7FFF #define IB_DEFAULT_PKEY_FULL 0xFFFF /* * Generic trap/notice types */ #define IB_NOTICE_TYPE_FATAL 0x80 #define IB_NOTICE_TYPE_URGENT 0x81 #define IB_NOTICE_TYPE_SECURITY 0x82 #define IB_NOTICE_TYPE_SM 0x83 #define IB_NOTICE_TYPE_INFO 0x84 /* * Generic trap/notice producers */ #define IB_NOTICE_PROD_CA cpu_to_be16(1) #define IB_NOTICE_PROD_SWITCH cpu_to_be16(2) #define IB_NOTICE_PROD_ROUTER cpu_to_be16(3) #define IB_NOTICE_PROD_CLASS_MGR cpu_to_be16(4) enum { IB_MGMT_MAD_HDR = 24, IB_MGMT_MAD_DATA = 232, IB_MGMT_RMPP_HDR = 36, IB_MGMT_RMPP_DATA = 220, IB_MGMT_VENDOR_HDR = 40, IB_MGMT_VENDOR_DATA = 216, IB_MGMT_SA_HDR = 56, IB_MGMT_SA_DATA = 200, IB_MGMT_DEVICE_HDR = 64, IB_MGMT_DEVICE_DATA = 192, IB_MGMT_MAD_SIZE = IB_MGMT_MAD_HDR + IB_MGMT_MAD_DATA, OPA_MGMT_MAD_DATA = 2024, OPA_MGMT_RMPP_DATA = 2012, OPA_MGMT_MAD_SIZE = IB_MGMT_MAD_HDR + OPA_MGMT_MAD_DATA, }; struct ib_mad_hdr { u8 base_version; u8 mgmt_class; u8 class_version; u8 method; __be16 status; __be16 class_specific; __be64 tid; __be16 attr_id; __be16 resv; __be32 attr_mod; }; struct ib_rmpp_hdr { u8 rmpp_version; u8 rmpp_type; u8 rmpp_rtime_flags; u8 rmpp_status; __be32 seg_num; __be32 paylen_newwin; }; typedef u64 __bitwise ib_sa_comp_mask; #define IB_SA_COMP_MASK(n) ((__force ib_sa_comp_mask) cpu_to_be64(1ull << (n))) /* * ib_sa_hdr and ib_sa_mad structures must be packed because they have * 64-bit fields that are only 32-bit aligned. 64-bit architectures will * lay them out wrong otherwise. (And unfortunately they are sent on * the wire so we can't change the layout) */ struct ib_sa_hdr { __be64 sm_key; __be16 attr_offset; __be16 reserved; ib_sa_comp_mask comp_mask; } __packed; struct ib_mad { struct ib_mad_hdr mad_hdr; u8 data[IB_MGMT_MAD_DATA]; }; struct opa_mad { struct ib_mad_hdr mad_hdr; u8 data[OPA_MGMT_MAD_DATA]; }; struct ib_rmpp_mad { struct ib_mad_hdr mad_hdr; struct ib_rmpp_hdr rmpp_hdr; u8 data[IB_MGMT_RMPP_DATA]; }; struct opa_rmpp_mad { struct ib_mad_hdr mad_hdr; struct ib_rmpp_hdr rmpp_hdr; u8 data[OPA_MGMT_RMPP_DATA]; }; struct ib_sa_mad { struct ib_mad_hdr mad_hdr; struct ib_rmpp_hdr rmpp_hdr; struct ib_sa_hdr sa_hdr; u8 data[IB_MGMT_SA_DATA]; } __packed; struct ib_vendor_mad { struct ib_mad_hdr mad_hdr; struct ib_rmpp_hdr rmpp_hdr; u8 reserved; u8 oui[3]; u8 data[IB_MGMT_VENDOR_DATA]; }; #define IB_MGMT_CLASSPORTINFO_ATTR_ID cpu_to_be16(0x0001) #define IB_CLASS_PORT_INFO_RESP_TIME_MASK 0x1F #define IB_CLASS_PORT_INFO_RESP_TIME_FIELD_SIZE 5 struct ib_class_port_info { u8 base_version; u8 class_version; __be16 capability_mask; /* 27 bits for cap_mask2, 5 bits for resp_time */ __be32 cap_mask2_resp_time; u8 redirect_gid[16]; __be32 redirect_tcslfl; __be16 redirect_lid; __be16 redirect_pkey; __be32 redirect_qp; __be32 redirect_qkey; u8 trap_gid[16]; __be32 trap_tcslfl; __be16 trap_lid; __be16 trap_pkey; __be32 trap_hlqp; __be32 trap_qkey; }; /* PortInfo CapabilityMask */ enum ib_port_capability_mask_bits { IB_PORT_SM = 1 << 1, IB_PORT_NOTICE_SUP = 1 << 2, IB_PORT_TRAP_SUP = 1 << 3, IB_PORT_OPT_IPD_SUP = 1 << 4, IB_PORT_AUTO_MIGR_SUP = 1 << 5, IB_PORT_SL_MAP_SUP = 1 << 6, IB_PORT_MKEY_NVRAM = 1 << 7, IB_PORT_PKEY_NVRAM = 1 << 8, IB_PORT_LED_INFO_SUP = 1 << 9, IB_PORT_SM_DISABLED = 1 << 10, IB_PORT_SYS_IMAGE_GUID_SUP = 1 << 11, IB_PORT_PKEY_SW_EXT_PORT_TRAP_SUP = 1 << 12, IB_PORT_EXTENDED_SPEEDS_SUP = 1 << 14, IB_PORT_CAP_MASK2_SUP = 1 << 15, IB_PORT_CM_SUP = 1 << 16, IB_PORT_SNMP_TUNNEL_SUP = 1 << 17, IB_PORT_REINIT_SUP = 1 << 18, IB_PORT_DEVICE_MGMT_SUP = 1 << 19, IB_PORT_VENDOR_CLASS_SUP = 1 << 20, IB_PORT_DR_NOTICE_SUP = 1 << 21, IB_PORT_CAP_MASK_NOTICE_SUP = 1 << 22, IB_PORT_BOOT_MGMT_SUP = 1 << 23, IB_PORT_LINK_LATENCY_SUP = 1 << 24, IB_PORT_CLIENT_REG_SUP = 1 << 25, IB_PORT_OTHER_LOCAL_CHANGES_SUP = 1 << 26, IB_PORT_LINK_SPEED_WIDTH_TABLE_SUP = 1 << 27, IB_PORT_VENDOR_SPECIFIC_MADS_TABLE_SUP = 1 << 28, IB_PORT_MCAST_PKEY_TRAP_SUPPRESSION_SUP = 1 << 29, IB_PORT_MCAST_FDB_TOP_SUP = 1 << 30, IB_PORT_HIERARCHY_INFO_SUP = 1ULL << 31, }; enum ib_port_capability_mask2_bits { IB_PORT_SET_NODE_DESC_SUP = 1 << 0, IB_PORT_EX_PORT_INFO_EX_SUP = 1 << 1, IB_PORT_VIRT_SUP = 1 << 2, IB_PORT_SWITCH_PORT_STATE_TABLE_SUP = 1 << 3, IB_PORT_LINK_WIDTH_2X_SUP = 1 << 4, IB_PORT_LINK_SPEED_HDR_SUP = 1 << 5, IB_PORT_LINK_SPEED_NDR_SUP = 1 << 10, }; #define OPA_CLASS_PORT_INFO_PR_SUPPORT BIT(26) struct opa_class_port_info { u8 base_version; u8 class_version; __be16 cap_mask; __be32 cap_mask2_resp_time; u8 redirect_gid[16]; __be32 redirect_tc_fl; __be32 redirect_lid; __be32 redirect_sl_qp; __be32 redirect_qkey; u8 trap_gid[16]; __be32 trap_tc_fl; __be32 trap_lid; __be32 trap_hl_qp; __be32 trap_qkey; __be16 trap_pkey; __be16 redirect_pkey; u8 trap_sl_rsvd; u8 reserved[3]; } __packed; /** * ib_get_cpi_resp_time - Returns the resp_time value from * cap_mask2_resp_time in ib_class_port_info. * @cpi: A struct ib_class_port_info mad. */ static inline u8 ib_get_cpi_resp_time(struct ib_class_port_info *cpi) { return (u8)(be32_to_cpu(cpi->cap_mask2_resp_time) & IB_CLASS_PORT_INFO_RESP_TIME_MASK); } /** * ib_set_cpi_resptime - Sets the response time in an * ib_class_port_info mad. * @cpi: A struct ib_class_port_info. * @rtime: The response time to set. */ static inline void ib_set_cpi_resp_time(struct ib_class_port_info *cpi, u8 rtime) { cpi->cap_mask2_resp_time = (cpi->cap_mask2_resp_time & cpu_to_be32(~IB_CLASS_PORT_INFO_RESP_TIME_MASK)) | cpu_to_be32(rtime & IB_CLASS_PORT_INFO_RESP_TIME_MASK); } /** * ib_get_cpi_capmask2 - Returns the capmask2 value from * cap_mask2_resp_time in ib_class_port_info. * @cpi: A struct ib_class_port_info mad. */ static inline u32 ib_get_cpi_capmask2(struct ib_class_port_info *cpi) { return (be32_to_cpu(cpi->cap_mask2_resp_time) >> IB_CLASS_PORT_INFO_RESP_TIME_FIELD_SIZE); } /** * ib_set_cpi_capmask2 - Sets the capmask2 in an * ib_class_port_info mad. * @cpi: A struct ib_class_port_info. * @capmask2: The capmask2 to set. */ static inline void ib_set_cpi_capmask2(struct ib_class_port_info *cpi, u32 capmask2) { cpi->cap_mask2_resp_time = (cpi->cap_mask2_resp_time & cpu_to_be32(IB_CLASS_PORT_INFO_RESP_TIME_MASK)) | cpu_to_be32(capmask2 << IB_CLASS_PORT_INFO_RESP_TIME_FIELD_SIZE); } /** * opa_get_cpi_capmask2 - Returns the capmask2 value from * cap_mask2_resp_time in ib_class_port_info. * @cpi: A struct opa_class_port_info mad. */ static inline u32 opa_get_cpi_capmask2(struct opa_class_port_info *cpi) { return (be32_to_cpu(cpi->cap_mask2_resp_time) >> IB_CLASS_PORT_INFO_RESP_TIME_FIELD_SIZE); } struct ib_mad_notice_attr { u8 generic_type; u8 prod_type_msb; __be16 prod_type_lsb; __be16 trap_num; __be16 issuer_lid; __be16 toggle_count; union { struct { u8 details[54]; } raw_data; struct { __be16 reserved; __be16 lid; /* where violation happened */ u8 port_num; /* where violation happened */ } __packed ntc_129_131; struct { __be16 reserved; __be16 lid; /* LID where change occurred */ u8 reserved2; u8 local_changes; /* low bit - local changes */ __be32 new_cap_mask; /* new capability mask */ u8 reserved3; u8 change_flags; /* low 3 bits only */ } __packed ntc_144; struct { __be16 reserved; __be16 lid; /* lid where sys guid changed */ __be16 reserved2; __be64 new_sys_guid; } __packed ntc_145; struct { __be16 reserved; __be16 lid; __be16 dr_slid; u8 method; u8 reserved2; __be16 attr_id; __be32 attr_mod; __be64 mkey; u8 reserved3; u8 dr_trunc_hop; u8 dr_rtn_path[30]; } __packed ntc_256; struct { __be16 reserved; __be16 lid1; __be16 lid2; __be32 key; __be32 sl_qp1; /* SL: high 4 bits */ __be32 qp2; /* high 8 bits reserved */ union ib_gid gid1; union ib_gid gid2; } __packed ntc_257_258; } details; }; /** * ib_mad_send_buf - MAD data buffer and work request for sends. * @next: A pointer used to chain together MADs for posting. * @mad: References an allocated MAD data buffer for MADs that do not have * RMPP active. For MADs using RMPP, references the common and management * class specific headers. * @mad_agent: MAD agent that allocated the buffer. * @ah: The address handle to use when sending the MAD. * @context: User-controlled context fields. * @hdr_len: Indicates the size of the data header of the MAD. This length * includes the common MAD, RMPP, and class specific headers. * @data_len: Indicates the total size of user-transferred data. * @seg_count: The number of RMPP segments allocated for this send. * @seg_size: Size of the data in each RMPP segment. This does not include * class specific headers. * @seg_rmpp_size: Size of each RMPP segment including the class specific * headers. * @timeout_ms: Time to wait for a response. * @retries: Number of times to retry a request for a response. For MADs * using RMPP, this applies per window. On completion, returns the number * of retries needed to complete the transfer. * * Users are responsible for initializing the MAD buffer itself, with the * exception of any RMPP header. Additional segment buffer space allocated * beyond data_len is padding. */ struct ib_mad_send_buf { struct ib_mad_send_buf *next; void *mad; struct ib_mad_agent *mad_agent; struct ib_ah *ah; void *context[2]; int hdr_len; int data_len; int seg_count; int seg_size; int seg_rmpp_size; int timeout_ms; int retries; }; /** * ib_response_mad - Returns if the specified MAD has been generated in * response to a sent request or trap. */ int ib_response_mad(const struct ib_mad_hdr *hdr); /** * ib_get_rmpp_resptime - Returns the RMPP response time. * @rmpp_hdr: An RMPP header. */ static inline u8 ib_get_rmpp_resptime(struct ib_rmpp_hdr *rmpp_hdr) { return rmpp_hdr->rmpp_rtime_flags >> 3; } /** * ib_get_rmpp_flags - Returns the RMPP flags. * @rmpp_hdr: An RMPP header. */ static inline u8 ib_get_rmpp_flags(struct ib_rmpp_hdr *rmpp_hdr) { return rmpp_hdr->rmpp_rtime_flags & 0x7; } /** * ib_set_rmpp_resptime - Sets the response time in an RMPP header. * @rmpp_hdr: An RMPP header. * @rtime: The response time to set. */ static inline void ib_set_rmpp_resptime(struct ib_rmpp_hdr *rmpp_hdr, u8 rtime) { rmpp_hdr->rmpp_rtime_flags = ib_get_rmpp_flags(rmpp_hdr) | (rtime << 3); } /** * ib_set_rmpp_flags - Sets the flags in an RMPP header. * @rmpp_hdr: An RMPP header. * @flags: The flags to set. */ static inline void ib_set_rmpp_flags(struct ib_rmpp_hdr *rmpp_hdr, u8 flags) { rmpp_hdr->rmpp_rtime_flags = (rmpp_hdr->rmpp_rtime_flags & 0xF8) | (flags & 0x7); } struct ib_mad_agent; struct ib_mad_send_wc; struct ib_mad_recv_wc; /** * ib_mad_send_handler - callback handler for a sent MAD. * @mad_agent: MAD agent that sent the MAD. * @mad_send_wc: Send work completion information on the sent MAD. */ typedef void (*ib_mad_send_handler)(struct ib_mad_agent *mad_agent, struct ib_mad_send_wc *mad_send_wc); /** * ib_mad_recv_handler - callback handler for a received MAD. * @mad_agent: MAD agent requesting the received MAD. * @send_buf: Send buffer if found, else NULL * @mad_recv_wc: Received work completion information on the received MAD. * * MADs received in response to a send request operation will be handed to * the user before the send operation completes. All data buffers given * to registered agents through this routine are owned by the receiving * client. */ typedef void (*ib_mad_recv_handler)(struct ib_mad_agent *mad_agent, struct ib_mad_send_buf *send_buf, struct ib_mad_recv_wc *mad_recv_wc); /** * ib_mad_agent - Used to track MAD registration with the access layer. * @device: Reference to device registration is on. * @qp: Reference to QP used for sending and receiving MADs. * @mr: Memory region for system memory usable for DMA. * @recv_handler: Callback handler for a received MAD. * @send_handler: Callback handler for a sent MAD. * @context: User-specified context associated with this registration. * @hi_tid: Access layer assigned transaction ID for this client. * Unsolicited MADs sent by this client will have the upper 32-bits * of their TID set to this value. * @flags: registration flags * @port_num: Port number on which QP is registered * @rmpp_version: If set, indicates the RMPP version used by this agent. */ enum { IB_MAD_USER_RMPP = IB_USER_MAD_USER_RMPP, }; struct ib_mad_agent { struct ib_device *device; struct ib_qp *qp; ib_mad_recv_handler recv_handler; ib_mad_send_handler send_handler; void *context; u32 hi_tid; u32 flags; void *security; struct list_head mad_agent_sec_list; u8 port_num; u8 rmpp_version; bool smp_allowed; }; /** * ib_mad_send_wc - MAD send completion information. * @send_buf: Send MAD data buffer associated with the send MAD request. * @status: Completion status. * @vendor_err: Optional vendor error information returned with a failed * request. */ struct ib_mad_send_wc { struct ib_mad_send_buf *send_buf; enum ib_wc_status status; u32 vendor_err; }; /** * ib_mad_recv_buf - received MAD buffer information. * @list: Reference to next data buffer for a received RMPP MAD. * @grh: References a data buffer containing the global route header. * The data refereced by this buffer is only valid if the GRH is * valid. * @mad: References the start of the received MAD. */ struct ib_mad_recv_buf { struct list_head list; struct ib_grh *grh; union { struct ib_mad *mad; struct opa_mad *opa_mad; }; }; /** * ib_mad_recv_wc - received MAD information. * @wc: Completion information for the received data. * @recv_buf: Specifies the location of the received data buffer(s). * @rmpp_list: Specifies a list of RMPP reassembled received MAD buffers. * @mad_len: The length of the received MAD, without duplicated headers. * @mad_seg_size: The size of individual MAD segments * * For received response, the wr_id contains a pointer to the ib_mad_send_buf * for the corresponding send request. */ struct ib_mad_recv_wc { struct ib_wc *wc; struct ib_mad_recv_buf recv_buf; struct list_head rmpp_list; int mad_len; size_t mad_seg_size; }; /** * ib_mad_reg_req - MAD registration request * @mgmt_class: Indicates which management class of MADs should be receive * by the caller. This field is only required if the user wishes to * receive unsolicited MADs, otherwise it should be 0. * @mgmt_class_version: Indicates which version of MADs for the given * management class to receive. * @oui: Indicates IEEE OUI when mgmt_class is a vendor class * in the range from 0x30 to 0x4f. Otherwise not used. * @method_mask: The caller will receive unsolicited MADs for any method * where @method_mask = 1. * */ struct ib_mad_reg_req { u8 mgmt_class; u8 mgmt_class_version; u8 oui[3]; DECLARE_BITMAP(method_mask, IB_MGMT_MAX_METHODS); }; /** * ib_register_mad_agent - Register to send/receive MADs. * @device: The device to register with. * @port_num: The port on the specified device to use. * @qp_type: Specifies which QP to access. Must be either * IB_QPT_SMI or IB_QPT_GSI. * @mad_reg_req: Specifies which unsolicited MADs should be received * by the caller. This parameter may be NULL if the caller only * wishes to receive solicited responses. * @rmpp_version: If set, indicates that the client will send * and receive MADs that contain the RMPP header for the given version. * If set to 0, indicates that RMPP is not used by this client. * @send_handler: The completion callback routine invoked after a send * request has completed. * @recv_handler: The completion callback routine invoked for a received * MAD. * @context: User specified context associated with the registration. * @registration_flags: Registration flags to set for this agent */ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, u32 port_num, enum ib_qp_type qp_type, struct ib_mad_reg_req *mad_reg_req, u8 rmpp_version, ib_mad_send_handler send_handler, ib_mad_recv_handler recv_handler, void *context, u32 registration_flags); /** * ib_unregister_mad_agent - Unregisters a client from using MAD services. * @mad_agent: Corresponding MAD registration request to deregister. * * After invoking this routine, MAD services are no longer usable by the * client on the associated QP. */ void ib_unregister_mad_agent(struct ib_mad_agent *mad_agent); /** * ib_post_send_mad - Posts MAD(s) to the send queue of the QP associated * with the registered client. * @send_buf: Specifies the information needed to send the MAD(s). * @bad_send_buf: Specifies the MAD on which an error was encountered. This * parameter is optional if only a single MAD is posted. * * Sent MADs are not guaranteed to complete in the order that they were posted. * * If the MAD requires RMPP, the data buffer should contain a single copy * of the common MAD, RMPP, and class specific headers, followed by the class * defined data. If the class defined data would not divide evenly into * RMPP segments, then space must be allocated at the end of the referenced * buffer for any required padding. To indicate the amount of class defined * data being transferred, the paylen_newwin field in the RMPP header should * be set to the size of the class specific header plus the amount of class * defined data being transferred. The paylen_newwin field should be * specified in network-byte order. */ int ib_post_send_mad(struct ib_mad_send_buf *send_buf, struct ib_mad_send_buf **bad_send_buf); /** * ib_free_recv_mad - Returns data buffers used to receive a MAD. * @mad_recv_wc: Work completion information for a received MAD. * * Clients receiving MADs through their ib_mad_recv_handler must call this * routine to return the work completion buffers to the access layer. */ void ib_free_recv_mad(struct ib_mad_recv_wc *mad_recv_wc); /** * ib_modify_mad - Modifies an outstanding send MAD operation. * @send_buf: Indicates the MAD to modify. * @timeout_ms: New timeout value for sent MAD. * * This call will reset the timeout value for a sent MAD to the specified * value. */ int ib_modify_mad(struct ib_mad_send_buf *send_buf, u32 timeout_ms); /** * ib_cancel_mad - Cancels an outstanding send MAD operation. * @send_buf: Indicates the MAD to cancel. * * MADs will be returned to the user through the corresponding * ib_mad_send_handler. */ static inline void ib_cancel_mad(struct ib_mad_send_buf *send_buf) { ib_modify_mad(send_buf, 0); } /** * ib_create_send_mad - Allocate and initialize a data buffer and work request * for sending a MAD. * @mad_agent: Specifies the registered MAD service to associate with the MAD. * @remote_qpn: Specifies the QPN of the receiving node. * @pkey_index: Specifies which PKey the MAD will be sent using. This field * is valid only if the remote_qpn is QP 1. * @rmpp_active: Indicates if the send will enable RMPP. * @hdr_len: Indicates the size of the data header of the MAD. This length * should include the common MAD header, RMPP header, plus any class * specific header. * @data_len: Indicates the size of any user-transferred data. The call will * automatically adjust the allocated buffer size to account for any * additional padding that may be necessary. * @gfp_mask: GFP mask used for the memory allocation. * @base_version: Base Version of this MAD * * This routine allocates a MAD for sending. The returned MAD send buffer * will reference a data buffer usable for sending a MAD, along * with an initialized work request structure. Users may modify the returned * MAD data buffer before posting the send. * * The returned MAD header, class specific headers, and any padding will be * cleared. Users are responsible for initializing the common MAD header, * any class specific header, and MAD data area. * If @rmpp_active is set, the RMPP header will be initialized for sending. */ struct ib_mad_send_buf *ib_create_send_mad(struct ib_mad_agent *mad_agent, u32 remote_qpn, u16 pkey_index, int rmpp_active, int hdr_len, int data_len, gfp_t gfp_mask, u8 base_version); /** * ib_is_mad_class_rmpp - returns whether given management class * supports RMPP. * @mgmt_class: management class * * This routine returns whether the management class supports RMPP. */ int ib_is_mad_class_rmpp(u8 mgmt_class); /** * ib_get_mad_data_offset - returns the data offset for a given * management class. * @mgmt_class: management class * * This routine returns the data offset in the MAD for the management * class requested. */ int ib_get_mad_data_offset(u8 mgmt_class); /** * ib_get_rmpp_segment - returns the data buffer for a given RMPP segment. * @send_buf: Previously allocated send data buffer. * @seg_num: number of segment to return * * This routine returns a pointer to the data buffer of an RMPP MAD. * Users must provide synchronization to @send_buf around this call. */ void *ib_get_rmpp_segment(struct ib_mad_send_buf *send_buf, int seg_num); /** * ib_free_send_mad - Returns data buffers used to send a MAD. * @send_buf: Previously allocated send data buffer. */ void ib_free_send_mad(struct ib_mad_send_buf *send_buf); /** * ib_mad_kernel_rmpp_agent - Returns if the agent is performing RMPP. * @agent: the agent in question * @return: true if agent is performing rmpp, false otherwise. */ int ib_mad_kernel_rmpp_agent(const struct ib_mad_agent *agent); #endif /* IB_MAD_H */ PK ! �����M �M rdma/ib_sa.hnu �[��� /* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ /* * Copyright (c) 2004 Topspin Communications. All rights reserved. * Copyright (c) 2005 Voltaire, Inc. All rights reserved. * Copyright (c) 2006 Intel Corporation. All rights reserved. */ #ifndef IB_SA_H #define IB_SA_H #include <linux/completion.h> #include <linux/compiler.h> #include <linux/atomic.h> #include <linux/netdevice.h> #include <rdma/ib_verbs.h> #include <rdma/ib_mad.h> #include <rdma/ib_addr.h> #include <rdma/opa_addr.h> enum { IB_SA_CLASS_VERSION = 2, /* IB spec version 1.1/1.2 */ IB_SA_METHOD_GET_TABLE = 0x12, IB_SA_METHOD_GET_TABLE_RESP = 0x92, IB_SA_METHOD_DELETE = 0x15, IB_SA_METHOD_DELETE_RESP = 0x95, IB_SA_METHOD_GET_MULTI = 0x14, IB_SA_METHOD_GET_MULTI_RESP = 0x94, IB_SA_METHOD_GET_TRACE_TBL = 0x13 }; #define OPA_SA_CLASS_VERSION 0x80 enum { IB_SA_ATTR_CLASS_PORTINFO = 0x01, IB_SA_ATTR_NOTICE = 0x02, IB_SA_ATTR_INFORM_INFO = 0x03, IB_SA_ATTR_NODE_REC = 0x11, IB_SA_ATTR_PORT_INFO_REC = 0x12, IB_SA_ATTR_SL2VL_REC = 0x13, IB_SA_ATTR_SWITCH_REC = 0x14, IB_SA_ATTR_LINEAR_FDB_REC = 0x15, IB_SA_ATTR_RANDOM_FDB_REC = 0x16, IB_SA_ATTR_MCAST_FDB_REC = 0x17, IB_SA_ATTR_SM_INFO_REC = 0x18, IB_SA_ATTR_LINK_REC = 0x20, IB_SA_ATTR_GUID_INFO_REC = 0x30, IB_SA_ATTR_SERVICE_REC = 0x31, IB_SA_ATTR_PARTITION_REC = 0x33, IB_SA_ATTR_PATH_REC = 0x35, IB_SA_ATTR_VL_ARB_REC = 0x36, IB_SA_ATTR_MC_MEMBER_REC = 0x38, IB_SA_ATTR_TRACE_REC = 0x39, IB_SA_ATTR_MULTI_PATH_REC = 0x3a, IB_SA_ATTR_SERVICE_ASSOC_REC = 0x3b, IB_SA_ATTR_INFORM_INFO_REC = 0xf3 }; enum ib_sa_selector { IB_SA_GT = 0, IB_SA_LT = 1, IB_SA_EQ = 2, /* * The meaning of "best" depends on the attribute: for * example, for MTU best will return the largest available * MTU, while for packet life time, best will return the * smallest available life time. */ IB_SA_BEST = 3 }; /* * There are 4 types of join states: * FullMember, NonMember, SendOnlyNonMember, SendOnlyFullMember. * The order corresponds to JoinState bits in MCMemberRecord. */ enum ib_sa_mc_join_states { FULLMEMBER_JOIN, NONMEMBER_JOIN, SENDONLY_NONMEBER_JOIN, SENDONLY_FULLMEMBER_JOIN, NUM_JOIN_MEMBERSHIP_TYPES, }; #define IB_SA_CAP_MASK2_SENDONLY_FULL_MEM_SUPPORT BIT(12) /* * Structures for SA records are named "struct ib_sa_xxx_rec." No * attempt is made to pack structures to match the physical layout of * SA records in SA MADs; all packing and unpacking is handled by the * SA query code. * * For a record with structure ib_sa_xxx_rec, the naming convention * for the component mask value for field yyy is IB_SA_XXX_REC_YYY (we * never use different abbreviations or otherwise change the spelling * of xxx/yyy between ib_sa_xxx_rec.yyy and IB_SA_XXX_REC_YYY). * * Reserved rows are indicated with comments to help maintainability. */ #define IB_SA_PATH_REC_SERVICE_ID (IB_SA_COMP_MASK( 0) |\ IB_SA_COMP_MASK( 1)) #define IB_SA_PATH_REC_DGID IB_SA_COMP_MASK( 2) #define IB_SA_PATH_REC_SGID IB_SA_COMP_MASK( 3) #define IB_SA_PATH_REC_DLID IB_SA_COMP_MASK( 4) #define IB_SA_PATH_REC_SLID IB_SA_COMP_MASK( 5) #define IB_SA_PATH_REC_RAW_TRAFFIC IB_SA_COMP_MASK( 6) /* reserved: 7 */ #define IB_SA_PATH_REC_FLOW_LABEL IB_SA_COMP_MASK( 8) #define IB_SA_PATH_REC_HOP_LIMIT IB_SA_COMP_MASK( 9) #define IB_SA_PATH_REC_TRAFFIC_CLASS IB_SA_COMP_MASK(10) #define IB_SA_PATH_REC_REVERSIBLE IB_SA_COMP_MASK(11) #define IB_SA_PATH_REC_NUMB_PATH IB_SA_COMP_MASK(12) #define IB_SA_PATH_REC_PKEY IB_SA_COMP_MASK(13) #define IB_SA_PATH_REC_QOS_CLASS IB_SA_COMP_MASK(14) #define IB_SA_PATH_REC_SL IB_SA_COMP_MASK(15) #define IB_SA_PATH_REC_MTU_SELECTOR IB_SA_COMP_MASK(16) #define IB_SA_PATH_REC_MTU IB_SA_COMP_MASK(17) #define IB_SA_PATH_REC_RATE_SELECTOR IB_SA_COMP_MASK(18) #define IB_SA_PATH_REC_RATE IB_SA_COMP_MASK(19) #define IB_SA_PATH_REC_PACKET_LIFE_TIME_SELECTOR IB_SA_COMP_MASK(20) #define IB_SA_PATH_REC_PACKET_LIFE_TIME IB_SA_COMP_MASK(21) #define IB_SA_PATH_REC_PREFERENCE IB_SA_COMP_MASK(22) enum sa_path_rec_type { SA_PATH_REC_TYPE_IB, SA_PATH_REC_TYPE_ROCE_V1, SA_PATH_REC_TYPE_ROCE_V2, SA_PATH_REC_TYPE_OPA }; struct sa_path_rec_ib { __be16 dlid; __be16 slid; u8 raw_traffic; }; /** * struct sa_path_rec_roce - RoCE specific portion of the path record entry * @route_resolved: When set, it indicates that this route is already * resolved for this path record entry. * @dmac: Destination mac address for the given DGID entry * of the path record entry. */ struct sa_path_rec_roce { bool route_resolved; u8 dmac[ETH_ALEN]; }; struct sa_path_rec_opa { __be32 dlid; __be32 slid; u8 raw_traffic; u8 l2_8B; u8 l2_10B; u8 l2_9B; u8 l2_16B; u8 qos_type; u8 qos_priority; }; struct sa_path_rec { union ib_gid dgid; union ib_gid sgid; __be64 service_id; /* reserved */ __be32 flow_label; u8 hop_limit; u8 traffic_class; u8 reversible; u8 numb_path; __be16 pkey; __be16 qos_class; u8 sl; u8 mtu_selector; u8 mtu; u8 rate_selector; u8 rate; u8 packet_life_time_selector; u8 packet_life_time; u8 preference; union { struct sa_path_rec_ib ib; struct sa_path_rec_roce roce; struct sa_path_rec_opa opa; }; enum sa_path_rec_type rec_type; }; static inline enum ib_gid_type sa_conv_pathrec_to_gid_type(struct sa_path_rec *rec) { switch (rec->rec_type) { case SA_PATH_REC_TYPE_ROCE_V1: return IB_GID_TYPE_ROCE; case SA_PATH_REC_TYPE_ROCE_V2: return IB_GID_TYPE_ROCE_UDP_ENCAP; default: return IB_GID_TYPE_IB; } } static inline enum sa_path_rec_type sa_conv_gid_to_pathrec_type(enum ib_gid_type type) { switch (type) { case IB_GID_TYPE_ROCE: return SA_PATH_REC_TYPE_ROCE_V1; case IB_GID_TYPE_ROCE_UDP_ENCAP: return SA_PATH_REC_TYPE_ROCE_V2; default: return SA_PATH_REC_TYPE_IB; } } static inline void path_conv_opa_to_ib(struct sa_path_rec *ib, struct sa_path_rec *opa) { if ((be32_to_cpu(opa->opa.dlid) >= be16_to_cpu(IB_MULTICAST_LID_BASE)) || (be32_to_cpu(opa->opa.slid) >= be16_to_cpu(IB_MULTICAST_LID_BASE))) { /* Create OPA GID and zero out the LID */ ib->dgid.global.interface_id = OPA_MAKE_ID(be32_to_cpu(opa->opa.dlid)); ib->dgid.global.subnet_prefix = opa->dgid.global.subnet_prefix; ib->sgid.global.interface_id = OPA_MAKE_ID(be32_to_cpu(opa->opa.slid)); ib->dgid.global.subnet_prefix = opa->dgid.global.subnet_prefix; ib->ib.dlid = 0; ib->ib.slid = 0; } else { ib->ib.dlid = htons(ntohl(opa->opa.dlid)); ib->ib.slid = htons(ntohl(opa->opa.slid)); } ib->service_id = opa->service_id; ib->ib.raw_traffic = opa->opa.raw_traffic; } static inline void path_conv_ib_to_opa(struct sa_path_rec *opa, struct sa_path_rec *ib) { __be32 slid, dlid; if ((ib_is_opa_gid(&ib->sgid)) || (ib_is_opa_gid(&ib->dgid))) { slid = htonl(opa_get_lid_from_gid(&ib->sgid)); dlid = htonl(opa_get_lid_from_gid(&ib->dgid)); } else { slid = htonl(ntohs(ib->ib.slid)); dlid = htonl(ntohs(ib->ib.dlid)); } opa->opa.slid = slid; opa->opa.dlid = dlid; opa->service_id = ib->service_id; opa->opa.raw_traffic = ib->ib.raw_traffic; } /* Convert from OPA to IB path record */ static inline void sa_convert_path_opa_to_ib(struct sa_path_rec *dest, struct sa_path_rec *src) { if (src->rec_type != SA_PATH_REC_TYPE_OPA) return; *dest = *src; dest->rec_type = SA_PATH_REC_TYPE_IB; path_conv_opa_to_ib(dest, src); } /* Convert from IB to OPA path record */ static inline void sa_convert_path_ib_to_opa(struct sa_path_rec *dest, struct sa_path_rec *src) { if (src->rec_type != SA_PATH_REC_TYPE_IB) return; /* Do a structure copy and overwrite the relevant fields */ *dest = *src; dest->rec_type = SA_PATH_REC_TYPE_OPA; path_conv_ib_to_opa(dest, src); } #define IB_SA_MCMEMBER_REC_MGID IB_SA_COMP_MASK( 0) #define IB_SA_MCMEMBER_REC_PORT_GID IB_SA_COMP_MASK( 1) #define IB_SA_MCMEMBER_REC_QKEY IB_SA_COMP_MASK( 2) #define IB_SA_MCMEMBER_REC_MLID IB_SA_COMP_MASK( 3) #define IB_SA_MCMEMBER_REC_MTU_SELECTOR IB_SA_COMP_MASK( 4) #define IB_SA_MCMEMBER_REC_MTU IB_SA_COMP_MASK( 5) #define IB_SA_MCMEMBER_REC_TRAFFIC_CLASS IB_SA_COMP_MASK( 6) #define IB_SA_MCMEMBER_REC_PKEY IB_SA_COMP_MASK( 7) #define IB_SA_MCMEMBER_REC_RATE_SELECTOR IB_SA_COMP_MASK( 8) #define IB_SA_MCMEMBER_REC_RATE IB_SA_COMP_MASK( 9) #define IB_SA_MCMEMBER_REC_PACKET_LIFE_TIME_SELECTOR IB_SA_COMP_MASK(10) #define IB_SA_MCMEMBER_REC_PACKET_LIFE_TIME IB_SA_COMP_MASK(11) #define IB_SA_MCMEMBER_REC_SL IB_SA_COMP_MASK(12) #define IB_SA_MCMEMBER_REC_FLOW_LABEL IB_SA_COMP_MASK(13) #define IB_SA_MCMEMBER_REC_HOP_LIMIT IB_SA_COMP_MASK(14) #define IB_SA_MCMEMBER_REC_SCOPE IB_SA_COMP_MASK(15) #define IB_SA_MCMEMBER_REC_JOIN_STATE IB_SA_COMP_MASK(16) #define IB_SA_MCMEMBER_REC_PROXY_JOIN IB_SA_COMP_MASK(17) struct ib_sa_mcmember_rec { union ib_gid mgid; union ib_gid port_gid; __be32 qkey; __be16 mlid; u8 mtu_selector; u8 mtu; u8 traffic_class; __be16 pkey; u8 rate_selector; u8 rate; u8 packet_life_time_selector; u8 packet_life_time; u8 sl; __be32 flow_label; u8 hop_limit; u8 scope; u8 join_state; u8 proxy_join; }; /* Service Record Component Mask Sec 15.2.5.14 Ver 1.1 */ #define IB_SA_SERVICE_REC_SERVICE_ID IB_SA_COMP_MASK( 0) #define IB_SA_SERVICE_REC_SERVICE_GID IB_SA_COMP_MASK( 1) #define IB_SA_SERVICE_REC_SERVICE_PKEY IB_SA_COMP_MASK( 2) /* reserved: 3 */ #define IB_SA_SERVICE_REC_SERVICE_LEASE IB_SA_COMP_MASK( 4) #define IB_SA_SERVICE_REC_SERVICE_KEY IB_SA_COMP_MASK( 5) #define IB_SA_SERVICE_REC_SERVICE_NAME IB_SA_COMP_MASK( 6) #define IB_SA_SERVICE_REC_SERVICE_DATA8_0 IB_SA_COMP_MASK( 7) #define IB_SA_SERVICE_REC_SERVICE_DATA8_1 IB_SA_COMP_MASK( 8) #define IB_SA_SERVICE_REC_SERVICE_DATA8_2 IB_SA_COMP_MASK( 9) #define IB_SA_SERVICE_REC_SERVICE_DATA8_3 IB_SA_COMP_MASK(10) #define IB_SA_SERVICE_REC_SERVICE_DATA8_4 IB_SA_COMP_MASK(11) #define IB_SA_SERVICE_REC_SERVICE_DATA8_5 IB_SA_COMP_MASK(12) #define IB_SA_SERVICE_REC_SERVICE_DATA8_6 IB_SA_COMP_MASK(13) #define IB_SA_SERVICE_REC_SERVICE_DATA8_7 IB_SA_COMP_MASK(14) #define IB_SA_SERVICE_REC_SERVICE_DATA8_8 IB_SA_COMP_MASK(15) #define IB_SA_SERVICE_REC_SERVICE_DATA8_9 IB_SA_COMP_MASK(16) #define IB_SA_SERVICE_REC_SERVICE_DATA8_10 IB_SA_COMP_MASK(17) #define IB_SA_SERVICE_REC_SERVICE_DATA8_11 IB_SA_COMP_MASK(18) #define IB_SA_SERVICE_REC_SERVICE_DATA8_12 IB_SA_COMP_MASK(19) #define IB_SA_SERVICE_REC_SERVICE_DATA8_13 IB_SA_COMP_MASK(20) #define IB_SA_SERVICE_REC_SERVICE_DATA8_14 IB_SA_COMP_MASK(21) #define IB_SA_SERVICE_REC_SERVICE_DATA8_15 IB_SA_COMP_MASK(22) #define IB_SA_SERVICE_REC_SERVICE_DATA16_0 IB_SA_COMP_MASK(23) #define IB_SA_SERVICE_REC_SERVICE_DATA16_1 IB_SA_COMP_MASK(24) #define IB_SA_SERVICE_REC_SERVICE_DATA16_2 IB_SA_COMP_MASK(25) #define IB_SA_SERVICE_REC_SERVICE_DATA16_3 IB_SA_COMP_MASK(26) #define IB_SA_SERVICE_REC_SERVICE_DATA16_4 IB_SA_COMP_MASK(27) #define IB_SA_SERVICE_REC_SERVICE_DATA16_5 IB_SA_COMP_MASK(28) #define IB_SA_SERVICE_REC_SERVICE_DATA16_6 IB_SA_COMP_MASK(29) #define IB_SA_SERVICE_REC_SERVICE_DATA16_7 IB_SA_COMP_MASK(30) #define IB_SA_SERVICE_REC_SERVICE_DATA32_0 IB_SA_COMP_MASK(31) #define IB_SA_SERVICE_REC_SERVICE_DATA32_1 IB_SA_COMP_MASK(32) #define IB_SA_SERVICE_REC_SERVICE_DATA32_2 IB_SA_COMP_MASK(33) #define IB_SA_SERVICE_REC_SERVICE_DATA32_3 IB_SA_COMP_MASK(34) #define IB_SA_SERVICE_REC_SERVICE_DATA64_0 IB_SA_COMP_MASK(35) #define IB_SA_SERVICE_REC_SERVICE_DATA64_1 IB_SA_COMP_MASK(36) #define IB_DEFAULT_SERVICE_LEASE 0xFFFFFFFF #define IB_SA_GUIDINFO_REC_LID IB_SA_COMP_MASK(0) #define IB_SA_GUIDINFO_REC_BLOCK_NUM IB_SA_COMP_MASK(1) #define IB_SA_GUIDINFO_REC_RES1 IB_SA_COMP_MASK(2) #define IB_SA_GUIDINFO_REC_RES2 IB_SA_COMP_MASK(3) #define IB_SA_GUIDINFO_REC_GID0 IB_SA_COMP_MASK(4) #define IB_SA_GUIDINFO_REC_GID1 IB_SA_COMP_MASK(5) #define IB_SA_GUIDINFO_REC_GID2 IB_SA_COMP_MASK(6) #define IB_SA_GUIDINFO_REC_GID3 IB_SA_COMP_MASK(7) #define IB_SA_GUIDINFO_REC_GID4 IB_SA_COMP_MASK(8) #define IB_SA_GUIDINFO_REC_GID5 IB_SA_COMP_MASK(9) #define IB_SA_GUIDINFO_REC_GID6 IB_SA_COMP_MASK(10) #define IB_SA_GUIDINFO_REC_GID7 IB_SA_COMP_MASK(11) struct ib_sa_guidinfo_rec { __be16 lid; u8 block_num; /* reserved */ u8 res1; __be32 res2; u8 guid_info_list[64]; }; struct ib_sa_client { atomic_t users; struct completion comp; }; /** * ib_sa_register_client - Register an SA client. */ void ib_sa_register_client(struct ib_sa_client *client); /** * ib_sa_unregister_client - Deregister an SA client. * @client: Client object to deregister. */ void ib_sa_unregister_client(struct ib_sa_client *client); struct ib_sa_query; void ib_sa_cancel_query(int id, struct ib_sa_query *query); int ib_sa_path_rec_get(struct ib_sa_client *client, struct ib_device *device, u32 port_num, struct sa_path_rec *rec, ib_sa_comp_mask comp_mask, unsigned long timeout_ms, gfp_t gfp_mask, void (*callback)(int status, struct sa_path_rec *resp, void *context), void *context, struct ib_sa_query **query); struct ib_sa_multicast { struct ib_sa_mcmember_rec rec; ib_sa_comp_mask comp_mask; int (*callback)(int status, struct ib_sa_multicast *multicast); void *context; }; /** * ib_sa_join_multicast - Initiates a join request to the specified multicast * group. * @client: SA client * @device: Device associated with the multicast group. * @port_num: Port on the specified device to associate with the multicast * group. * @rec: SA multicast member record specifying group attributes. * @comp_mask: Component mask indicating which group attributes of %rec are * valid. * @gfp_mask: GFP mask for memory allocations. * @callback: User callback invoked once the join operation completes. * @context: User specified context stored with the ib_sa_multicast structure. * * This call initiates a multicast join request with the SA for the specified * multicast group. If the join operation is started successfully, it returns * an ib_sa_multicast structure that is used to track the multicast operation. * Users must free this structure by calling ib_free_multicast, even if the * join operation later fails. (The callback status is non-zero.) * * If the join operation fails; status will be non-zero, with the following * failures possible: * -ETIMEDOUT: The request timed out. * -EIO: An error occurred sending the query. * -EINVAL: The MCMemberRecord values differed from the existing group's. * -ENETRESET: Indicates that an fatal error has occurred on the multicast * group, and the user must rejoin the group to continue using it. */ struct ib_sa_multicast *ib_sa_join_multicast(struct ib_sa_client *client, struct ib_device *device, u32 port_num, struct ib_sa_mcmember_rec *rec, ib_sa_comp_mask comp_mask, gfp_t gfp_mask, int (*callback)(int status, struct ib_sa_multicast *multicast), void *context); /** * ib_free_multicast - Frees the multicast tracking structure, and releases * any reference on the multicast group. * @multicast: Multicast tracking structure allocated by ib_join_multicast. * * This call blocks until the multicast identifier is destroyed. It may * not be called from within the multicast callback; however, returning a non- * zero value from the callback will result in destroying the multicast * tracking structure. */ void ib_sa_free_multicast(struct ib_sa_multicast *multicast); /** * ib_get_mcmember_rec - Looks up a multicast member record by its MGID and * returns it if found. * @device: Device associated with the multicast group. * @port_num: Port on the specified device to associate with the multicast * group. * @mgid: MGID of multicast group. * @rec: Location to copy SA multicast member record. */ int ib_sa_get_mcmember_rec(struct ib_device *device, u32 port_num, union ib_gid *mgid, struct ib_sa_mcmember_rec *rec); /** * ib_init_ah_from_mcmember - Initialize address handle attributes based on * an SA multicast member record. */ int ib_init_ah_from_mcmember(struct ib_device *device, u32 port_num, struct ib_sa_mcmember_rec *rec, struct net_device *ndev, enum ib_gid_type gid_type, struct rdma_ah_attr *ah_attr); int ib_init_ah_attr_from_path(struct ib_device *device, u32 port_num, struct sa_path_rec *rec, struct rdma_ah_attr *ah_attr, const struct ib_gid_attr *sgid_attr); /** * ib_sa_pack_path - Conert a path record from struct ib_sa_path_rec * to IB MAD wire format. */ void ib_sa_pack_path(struct sa_path_rec *rec, void *attribute); /** * ib_sa_unpack_path - Convert a path record from MAD format to struct * ib_sa_path_rec. */ void ib_sa_unpack_path(void *attribute, struct sa_path_rec *rec); /* Support GuidInfoRecord */ int ib_sa_guid_info_rec_query(struct ib_sa_client *client, struct ib_device *device, u32 port_num, struct ib_sa_guidinfo_rec *rec, ib_sa_comp_mask comp_mask, u8 method, unsigned long timeout_ms, gfp_t gfp_mask, void (*callback)(int status, struct ib_sa_guidinfo_rec *resp, void *context), void *context, struct ib_sa_query **sa_query); static inline bool sa_path_is_roce(struct sa_path_rec *rec) { return ((rec->rec_type == SA_PATH_REC_TYPE_ROCE_V1) || (rec->rec_type == SA_PATH_REC_TYPE_ROCE_V2)); } static inline bool sa_path_is_opa(struct sa_path_rec *rec) { return (rec->rec_type == SA_PATH_REC_TYPE_OPA); } static inline void sa_path_set_slid(struct sa_path_rec *rec, u32 slid) { if (rec->rec_type == SA_PATH_REC_TYPE_IB) rec->ib.slid = cpu_to_be16(slid); else if (rec->rec_type == SA_PATH_REC_TYPE_OPA) rec->opa.slid = cpu_to_be32(slid); } static inline void sa_path_set_dlid(struct sa_path_rec *rec, u32 dlid) { if (rec->rec_type == SA_PATH_REC_TYPE_IB) rec->ib.dlid = cpu_to_be16(dlid); else if (rec->rec_type == SA_PATH_REC_TYPE_OPA) rec->opa.dlid = cpu_to_be32(dlid); } static inline void sa_path_set_raw_traffic(struct sa_path_rec *rec, u8 raw_traffic) { if (rec->rec_type == SA_PATH_REC_TYPE_IB) rec->ib.raw_traffic = raw_traffic; else if (rec->rec_type == SA_PATH_REC_TYPE_OPA) rec->opa.raw_traffic = raw_traffic; } static inline __be32 sa_path_get_slid(struct sa_path_rec *rec) { if (rec->rec_type == SA_PATH_REC_TYPE_IB) return htonl(ntohs(rec->ib.slid)); else if (rec->rec_type == SA_PATH_REC_TYPE_OPA) return rec->opa.slid; return 0; } static inline __be32 sa_path_get_dlid(struct sa_path_rec *rec) { if (rec->rec_type == SA_PATH_REC_TYPE_IB) return htonl(ntohs(rec->ib.dlid)); else if (rec->rec_type == SA_PATH_REC_TYPE_OPA) return rec->opa.dlid; return 0; } static inline u8 sa_path_get_raw_traffic(struct sa_path_rec *rec) { if (rec->rec_type == SA_PATH_REC_TYPE_IB) return rec->ib.raw_traffic; else if (rec->rec_type == SA_PATH_REC_TYPE_OPA) return rec->opa.raw_traffic; return 0; } static inline void sa_path_set_dmac(struct sa_path_rec *rec, u8 *dmac) { if (sa_path_is_roce(rec)) memcpy(rec->roce.dmac, dmac, ETH_ALEN); } static inline void sa_path_set_dmac_zero(struct sa_path_rec *rec) { if (sa_path_is_roce(rec)) eth_zero_addr(rec->roce.dmac); } static inline u8 *sa_path_get_dmac(struct sa_path_rec *rec) { if (sa_path_is_roce(rec)) return rec->roce.dmac; return NULL; } #endif /* IB_SA_H */ PK ! �G�J rdma/rdma_cm_ib.hnu �[��� /* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ /* * Copyright (c) 2006 Intel Corporation. All rights reserved. */ #ifndef RDMA_CM_IB_H #define RDMA_CM_IB_H #include <rdma/rdma_cm.h> /** * rdma_set_ib_path - Manually sets the path record used to establish a * connection. * @id: Connection identifier associated with the request. * @path_rec: Reference to the path record * * This call permits a user to specify routing information for rdma_cm_id's * bound to InfiniBand devices. It is called on the client side of a * connection and replaces the call to rdma_resolve_route. */ int rdma_set_ib_path(struct rdma_cm_id *id, struct sa_path_rec *path_rec); /* Global qkey for UDP QPs and multicast groups. */ #define RDMA_UDP_QKEY 0x01234567 #endif /* RDMA_CM_IB_H */ PK ! ��� � rdma/rw.hnu �[��� /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2016 HGST, a Western Digital Company. */ #ifndef _RDMA_RW_H #define _RDMA_RW_H #include <linux/dma-mapping.h> #include <linux/scatterlist.h> #include <rdma/ib_verbs.h> #include <rdma/rdma_cm.h> #include <rdma/mr_pool.h> struct rdma_rw_ctx { /* number of RDMA READ/WRITE WRs (not counting MR WRs) */ u32 nr_ops; /* tag for the union below: */ u8 type; union { /* for mapping a single SGE: */ struct { struct ib_sge sge; struct ib_rdma_wr wr; } single; /* for mapping of multiple SGEs: */ struct { struct ib_sge *sges; struct ib_rdma_wr *wrs; } map; /* for registering multiple WRs: */ struct rdma_rw_reg_ctx { struct ib_sge sge; struct ib_rdma_wr wr; struct ib_reg_wr reg_wr; struct ib_send_wr inv_wr; struct ib_mr *mr; } *reg; }; }; int rdma_rw_ctx_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u32 port_num, struct scatterlist *sg, u32 sg_cnt, u32 sg_offset, u64 remote_addr, u32 rkey, enum dma_data_direction dir); void rdma_rw_ctx_destroy(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u32 port_num, struct scatterlist *sg, u32 sg_cnt, enum dma_data_direction dir); int rdma_rw_ctx_signature_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u32 port_num, struct scatterlist *sg, u32 sg_cnt, struct scatterlist *prot_sg, u32 prot_sg_cnt, struct ib_sig_attrs *sig_attrs, u64 remote_addr, u32 rkey, enum dma_data_direction dir); void rdma_rw_ctx_destroy_signature(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u32 port_num, struct scatterlist *sg, u32 sg_cnt, struct scatterlist *prot_sg, u32 prot_sg_cnt, enum dma_data_direction dir); struct ib_send_wr *rdma_rw_ctx_wrs(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u32 port_num, struct ib_cqe *cqe, struct ib_send_wr *chain_wr); int rdma_rw_ctx_post(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u32 port_num, struct ib_cqe *cqe, struct ib_send_wr *chain_wr); unsigned int rdma_rw_mr_factor(struct ib_device *device, u32 port_num, unsigned int maxpages); void rdma_rw_init_qp(struct ib_device *dev, struct ib_qp_init_attr *attr); int rdma_rw_init_mrs(struct ib_qp *qp, struct ib_qp_init_attr *attr); void rdma_rw_cleanup_mrs(struct ib_qp *qp); #endif /* _RDMA_RW_H */ PK ! �3��� � rdma/ib_cache.hnu �[��� /* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ /* * Copyright (c) 2004 Topspin Communications. All rights reserved. * Copyright (c) 2005 Intel Corporation. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. */ #ifndef _IB_CACHE_H #define _IB_CACHE_H #include <rdma/ib_verbs.h> int rdma_query_gid(struct ib_device *device, u32 port_num, int index, union ib_gid *gid); void *rdma_read_gid_hw_context(const struct ib_gid_attr *attr); const struct ib_gid_attr *rdma_find_gid(struct ib_device *device, const union ib_gid *gid, enum ib_gid_type gid_type, struct net_device *ndev); const struct ib_gid_attr *rdma_find_gid_by_port(struct ib_device *ib_dev, const union ib_gid *gid, enum ib_gid_type gid_type, u32 port, struct net_device *ndev); const struct ib_gid_attr *rdma_find_gid_by_filter( struct ib_device *device, const union ib_gid *gid, u32 port_num, bool (*filter)(const union ib_gid *gid, const struct ib_gid_attr *, void *), void *context); int rdma_read_gid_l2_fields(const struct ib_gid_attr *attr, u16 *vlan_id, u8 *smac); struct net_device *rdma_read_gid_attr_ndev_rcu(const struct ib_gid_attr *attr); /** * ib_get_cached_pkey - Returns a cached PKey table entry * @device: The device to query. * @port_num: The port number of the device to query. * @index: The index into the cached PKey table to query. * @pkey: The PKey value found at the specified index. * * ib_get_cached_pkey() fetches the specified PKey table entry stored in * the local software cache. */ int ib_get_cached_pkey(struct ib_device *device_handle, u32 port_num, int index, u16 *pkey); /** * ib_find_cached_pkey - Returns the PKey table index where a specified * PKey value occurs. * @device: The device to query. * @port_num: The port number of the device to search for the PKey. * @pkey: The PKey value to search for. * @index: The index into the cached PKey table where the PKey was found. * * ib_find_cached_pkey() searches the specified PKey table in * the local software cache. */ int ib_find_cached_pkey(struct ib_device *device, u32 port_num, u16 pkey, u16 *index); /** * ib_find_exact_cached_pkey - Returns the PKey table index where a specified * PKey value occurs. Comparison uses the FULL 16 bits (incl membership bit) * @device: The device to query. * @port_num: The port number of the device to search for the PKey. * @pkey: The PKey value to search for. * @index: The index into the cached PKey table where the PKey was found. * * ib_find_exact_cached_pkey() searches the specified PKey table in * the local software cache. */ int ib_find_exact_cached_pkey(struct ib_device *device, u32 port_num, u16 pkey, u16 *index); /** * ib_get_cached_lmc - Returns a cached lmc table entry * @device: The device to query. * @port_num: The port number of the device to query. * @lmc: The lmc value for the specified port for that device. * * ib_get_cached_lmc() fetches the specified lmc table entry stored in * the local software cache. */ int ib_get_cached_lmc(struct ib_device *device, u32 port_num, u8 *lmc); /** * ib_get_cached_port_state - Returns a cached port state table entry * @device: The device to query. * @port_num: The port number of the device to query. * @port_state: port_state for the specified port for that device. * * ib_get_cached_port_state() fetches the specified port_state table entry stored in * the local software cache. */ int ib_get_cached_port_state(struct ib_device *device, u32 port_num, enum ib_port_state *port_active); bool rdma_is_zero_gid(const union ib_gid *gid); const struct ib_gid_attr *rdma_get_gid_attr(struct ib_device *device, u32 port_num, int index); void rdma_put_gid_attr(const struct ib_gid_attr *attr); void rdma_hold_gid_attr(const struct ib_gid_attr *attr); ssize_t rdma_query_gid_table(struct ib_device *device, struct ib_uverbs_gid_entry *entries, size_t max_entries); #endif /* _IB_CACHE_H */ PK ! V�{xA3 A3 rdma/rdma_cm.hnu �[��� /* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ /* * Copyright (c) 2005 Voltaire Inc. All rights reserved. * Copyright (c) 2005 Intel Corporation. All rights reserved. */ #ifndef RDMA_CM_H #define RDMA_CM_H #include <linux/socket.h> #include <linux/in6.h> #include <rdma/ib_addr.h> #include <rdma/ib_sa.h> #include <uapi/rdma/rdma_user_cm.h> /* * Upon receiving a device removal event, users must destroy the associated * RDMA identifier and release all resources allocated with the device. */ enum rdma_cm_event_type { RDMA_CM_EVENT_ADDR_RESOLVED, RDMA_CM_EVENT_ADDR_ERROR, RDMA_CM_EVENT_ROUTE_RESOLVED, RDMA_CM_EVENT_ROUTE_ERROR, RDMA_CM_EVENT_CONNECT_REQUEST, RDMA_CM_EVENT_CONNECT_RESPONSE, RDMA_CM_EVENT_CONNECT_ERROR, RDMA_CM_EVENT_UNREACHABLE, RDMA_CM_EVENT_REJECTED, RDMA_CM_EVENT_ESTABLISHED, RDMA_CM_EVENT_DISCONNECTED, RDMA_CM_EVENT_DEVICE_REMOVAL, RDMA_CM_EVENT_MULTICAST_JOIN, RDMA_CM_EVENT_MULTICAST_ERROR, RDMA_CM_EVENT_ADDR_CHANGE, RDMA_CM_EVENT_TIMEWAIT_EXIT }; const char *__attribute_const__ rdma_event_msg(enum rdma_cm_event_type event); #define RDMA_IB_IP_PS_MASK 0xFFFFFFFFFFFF0000ULL #define RDMA_IB_IP_PS_TCP 0x0000000001060000ULL #define RDMA_IB_IP_PS_UDP 0x0000000001110000ULL #define RDMA_IB_IP_PS_IB 0x00000000013F0000ULL struct rdma_addr { struct sockaddr_storage src_addr; struct sockaddr_storage dst_addr; struct rdma_dev_addr dev_addr; }; struct rdma_route { struct rdma_addr addr; struct sa_path_rec *path_rec; int num_paths; }; struct rdma_conn_param { const void *private_data; u8 private_data_len; u8 responder_resources; u8 initiator_depth; u8 flow_control; u8 retry_count; /* ignored when accepting */ u8 rnr_retry_count; /* Fields below ignored if a QP is created on the rdma_cm_id. */ u8 srq; u32 qp_num; u32 qkey; }; struct rdma_ud_param { const void *private_data; u8 private_data_len; struct rdma_ah_attr ah_attr; u32 qp_num; u32 qkey; }; struct rdma_cm_event { enum rdma_cm_event_type event; int status; union { struct rdma_conn_param conn; struct rdma_ud_param ud; } param; struct rdma_ucm_ece ece; }; struct rdma_cm_id; /** * rdma_cm_event_handler - Callback used to report user events. * * Notes: Users may not call rdma_destroy_id from this callback to destroy * the passed in id, or a corresponding listen id. Returning a * non-zero value from the callback will destroy the passed in id. */ typedef int (*rdma_cm_event_handler)(struct rdma_cm_id *id, struct rdma_cm_event *event); struct rdma_cm_id { struct ib_device *device; void *context; struct ib_qp *qp; rdma_cm_event_handler event_handler; struct rdma_route route; enum rdma_ucm_port_space ps; enum ib_qp_type qp_type; u32 port_num; }; struct rdma_cm_id * __rdma_create_kernel_id(struct net *net, rdma_cm_event_handler event_handler, void *context, enum rdma_ucm_port_space ps, enum ib_qp_type qp_type, const char *caller); struct rdma_cm_id *rdma_create_user_id(rdma_cm_event_handler event_handler, void *context, enum rdma_ucm_port_space ps, enum ib_qp_type qp_type); /** * rdma_create_id - Create an RDMA identifier. * * @net: The network namespace in which to create the new id. * @event_handler: User callback invoked to report events associated with the * returned rdma_id. * @context: User specified context associated with the id. * @ps: RDMA port space. * @qp_type: type of queue pair associated with the id. * * Returns a new rdma_cm_id. The id holds a reference on the network * namespace until it is destroyed. * * The event handler callback serializes on the id's mutex and is * allowed to sleep. */ #define rdma_create_id(net, event_handler, context, ps, qp_type) \ __rdma_create_kernel_id(net, event_handler, context, ps, qp_type, \ KBUILD_MODNAME) /** * rdma_destroy_id - Destroys an RDMA identifier. * * @id: RDMA identifier. * * Note: calling this function has the effect of canceling in-flight * asynchronous operations associated with the id. */ void rdma_destroy_id(struct rdma_cm_id *id); /** * rdma_bind_addr - Bind an RDMA identifier to a source address and * associated RDMA device, if needed. * * @id: RDMA identifier. * @addr: Local address information. Wildcard values are permitted. * * This associates a source address with the RDMA identifier before calling * rdma_listen. If a specific local address is given, the RDMA identifier will * be bound to a local RDMA device. */ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr); /** * rdma_resolve_addr - Resolve destination and optional source addresses * from IP addresses to an RDMA address. If successful, the specified * rdma_cm_id will be bound to a local device. * * @id: RDMA identifier. * @src_addr: Source address information. This parameter may be NULL. * @dst_addr: Destination address information. * @timeout_ms: Time to wait for resolution to complete. */ int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, const struct sockaddr *dst_addr, unsigned long timeout_ms); /** * rdma_resolve_route - Resolve the RDMA address bound to the RDMA identifier * into route information needed to establish a connection. * * This is called on the client side of a connection. * Users must have first called rdma_resolve_addr to resolve a dst_addr * into an RDMA address before calling this routine. */ int rdma_resolve_route(struct rdma_cm_id *id, unsigned long timeout_ms); /** * rdma_create_qp - Allocate a QP and associate it with the specified RDMA * identifier. * * QPs allocated to an rdma_cm_id will automatically be transitioned by the CMA * through their states. */ int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd, struct ib_qp_init_attr *qp_init_attr); /** * rdma_destroy_qp - Deallocate the QP associated with the specified RDMA * identifier. * * Users must destroy any QP associated with an RDMA identifier before * destroying the RDMA ID. */ void rdma_destroy_qp(struct rdma_cm_id *id); /** * rdma_init_qp_attr - Initializes the QP attributes for use in transitioning * to a specified QP state. * @id: Communication identifier associated with the QP attributes to * initialize. * @qp_attr: On input, specifies the desired QP state. On output, the * mandatory and desired optional attributes will be set in order to * modify the QP to the specified state. * @qp_attr_mask: The QP attribute mask that may be used to transition the * QP to the specified state. * * Users must set the @qp_attr->qp_state to the desired QP state. This call * will set all required attributes for the given transition, along with * known optional attributes. Users may override the attributes returned from * this call before calling ib_modify_qp. * * Users that wish to have their QP automatically transitioned through its * states can associate a QP with the rdma_cm_id by calling rdma_create_qp(). */ int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr, int *qp_attr_mask); int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param); int rdma_connect_locked(struct rdma_cm_id *id, struct rdma_conn_param *conn_param); int rdma_connect_ece(struct rdma_cm_id *id, struct rdma_conn_param *conn_param, struct rdma_ucm_ece *ece); /** * rdma_listen - This function is called by the passive side to * listen for incoming connection requests. * * Users must have bound the rdma_cm_id to a local address by calling * rdma_bind_addr before calling this routine. */ int rdma_listen(struct rdma_cm_id *id, int backlog); int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param); void rdma_lock_handler(struct rdma_cm_id *id); void rdma_unlock_handler(struct rdma_cm_id *id); int rdma_accept_ece(struct rdma_cm_id *id, struct rdma_conn_param *conn_param, struct rdma_ucm_ece *ece); /** * rdma_notify - Notifies the RDMA CM of an asynchronous event that has * occurred on the connection. * @id: Connection identifier to transition to established. * @event: Asynchronous event. * * This routine should be invoked by users to notify the CM of relevant * communication events. Events that should be reported to the CM and * when to report them are: * * IB_EVENT_COMM_EST - Used when a message is received on a connected * QP before an RTU has been received. */ int rdma_notify(struct rdma_cm_id *id, enum ib_event_type event); /** * rdma_reject - Called to reject a connection request or response. */ int rdma_reject(struct rdma_cm_id *id, const void *private_data, u8 private_data_len, u8 reason); /** * rdma_disconnect - This function disconnects the associated QP and * transitions it into the error state. */ int rdma_disconnect(struct rdma_cm_id *id); /** * rdma_join_multicast - Join the multicast group specified by the given * address. * @id: Communication identifier associated with the request. * @addr: Multicast address identifying the group to join. * @join_state: Multicast JoinState bitmap requested by port. * Bitmap is based on IB_SA_MCMEMBER_REC_JOIN_STATE bits. * @context: User-defined context associated with the join request, returned * to the user through the private_data pointer in multicast events. */ int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr, u8 join_state, void *context); /** * rdma_leave_multicast - Leave the multicast group specified by the given * address. */ void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr); /** * rdma_set_service_type - Set the type of service associated with a * connection identifier. * @id: Communication identifier to associated with service type. * @tos: Type of service. * * The type of service is interpretted as a differentiated service * field (RFC 2474). The service type should be specified before * performing route resolution, as existing communication on the * connection identifier may be unaffected. The type of service * requested may not be supported by the network to all destinations. */ void rdma_set_service_type(struct rdma_cm_id *id, int tos); /** * rdma_set_reuseaddr - Allow the reuse of local addresses when binding * the rdma_cm_id. * @id: Communication identifier to configure. * @reuse: Value indicating if the bound address is reusable. * * Reuse must be set before an address is bound to the id. */ int rdma_set_reuseaddr(struct rdma_cm_id *id, int reuse); /** * rdma_set_afonly - Specify that listens are restricted to the * bound address family only. * @id: Communication identifer to configure. * @afonly: Value indicating if listens are restricted. * * Must be set before identifier is in the listening state. */ int rdma_set_afonly(struct rdma_cm_id *id, int afonly); int rdma_set_ack_timeout(struct rdma_cm_id *id, u8 timeout); int rdma_set_min_rnr_timer(struct rdma_cm_id *id, u8 min_rnr_timer); /** * rdma_get_service_id - Return the IB service ID for a specified address. * @id: Communication identifier associated with the address. * @addr: Address for the service ID. */ __be64 rdma_get_service_id(struct rdma_cm_id *id, struct sockaddr *addr); /** * rdma_reject_msg - return a pointer to a reject message string. * @id: Communication identifier that received the REJECT event. * @reason: Value returned in the REJECT event status field. */ const char *__attribute_const__ rdma_reject_msg(struct rdma_cm_id *id, int reason); /** * rdma_consumer_reject_data - return the consumer reject private data and * length, if any. * @id: Communication identifier that received the REJECT event. * @ev: RDMA CM reject event. * @data_len: Pointer to the resulting length of the consumer data. */ const void *rdma_consumer_reject_data(struct rdma_cm_id *id, struct rdma_cm_event *ev, u8 *data_len); /** * rdma_read_gids - Return the SGID and DGID used for establishing * connection. This can be used after rdma_resolve_addr() * on client side. This can be use on new connection * on server side. This is applicable to IB, RoCE, iWarp. * If cm_id is not bound yet to the RDMA device, it doesn't * copy and SGID or DGID to the given pointers. * @id: Communication identifier whose GIDs are queried. * @sgid: Pointer to SGID where SGID will be returned. It is optional. * @dgid: Pointer to DGID where DGID will be returned. It is optional. * Note: This API should not be used by any new ULPs or new code. * Instead, users interested in querying GIDs should refer to path record * of the rdma_cm_id to query the GIDs. * This API is provided for compatibility for existing users. */ void rdma_read_gids(struct rdma_cm_id *cm_id, union ib_gid *sgid, union ib_gid *dgid); struct iw_cm_id *rdma_iw_cm_id(struct rdma_cm_id *cm_id); struct rdma_cm_id *rdma_res_to_id(struct rdma_restrack_entry *res); #endif /* RDMA_CM_H */ PK ! �6��8 8 rdma/mr_pool.hnu �[��� /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2016 HGST, a Western Digital Company. */ #ifndef _RDMA_MR_POOL_H #define _RDMA_MR_POOL_H 1 #include <rdma/ib_verbs.h> struct ib_mr *ib_mr_pool_get(struct ib_qp *qp, struct list_head *list); void ib_mr_pool_put(struct ib_qp *qp, struct list_head *list, struct ib_mr *mr); int ib_mr_pool_init(struct ib_qp *qp, struct list_head *list, int nr, enum ib_mr_type type, u32 max_num_sg, u32 max_num_meta_sg); void ib_mr_pool_destroy(struct ib_qp *qp, struct list_head *list); #endif /* _RDMA_MR_POOL_H */ PK ! �OT� � rdma/opa_addr.hnu �[��� /* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ /* * Copyright(c) 2017 Intel Corporation. */ #ifndef OPA_ADDR_H #define OPA_ADDR_H #include <rdma/opa_smi.h> #define OPA_SPECIAL_OUI (0x00066AULL) #define OPA_MAKE_ID(x) (cpu_to_be64(OPA_SPECIAL_OUI << 40 | (x))) #define OPA_TO_IB_UCAST_LID(x) (((x) >= be16_to_cpu(IB_MULTICAST_LID_BASE)) \ ? 0 : x) #define OPA_GID_INDEX 0x1 /** * 0xF8 - 4 bits of multicast range and 1 bit for collective range * Example: For 24 bit LID space, * Multicast range: 0xF00000 to 0xF7FFFF * Collective range: 0xF80000 to 0xFFFFFE */ #define OPA_MCAST_NR 0x4 /* Number of top bits set */ #define OPA_COLLECTIVE_NR 0x1 /* Number of bits after MCAST_NR */ /** * ib_is_opa_gid: Returns true if the top 24 bits of the gid * contains the OPA_STL_OUI identifier. This identifies that * the provided gid is a special purpose GID meant to carry * extended LID information. * * @gid: The Global identifier */ static inline bool ib_is_opa_gid(const union ib_gid *gid) { return ((be64_to_cpu(gid->global.interface_id) >> 40) == OPA_SPECIAL_OUI); } /** * opa_get_lid_from_gid: Returns the last 32 bits of the gid. * OPA devices use one of the gids in the gid table to also * store the lid. * * @gid: The Global identifier */ static inline u32 opa_get_lid_from_gid(const union ib_gid *gid) { return be64_to_cpu(gid->global.interface_id) & 0xFFFFFFFF; } /** * opa_is_extended_lid: Returns true if dlid or slid are * extended. * * @dlid: The DLID * @slid: The SLID */ static inline bool opa_is_extended_lid(__be32 dlid, __be32 slid) { if ((be32_to_cpu(dlid) >= be16_to_cpu(IB_MULTICAST_LID_BASE)) || (be32_to_cpu(slid) >= be16_to_cpu(IB_MULTICAST_LID_BASE))) return true; return false; } /* Get multicast lid base */ static inline u32 opa_get_mcast_base(u32 nr_top_bits) { return (be32_to_cpu(OPA_LID_PERMISSIVE) << (32 - nr_top_bits)); } /* Check for a valid unicast LID for non-SM traffic types */ static inline bool rdma_is_valid_unicast_lid(struct rdma_ah_attr *attr) { if (attr->type == RDMA_AH_ATTR_TYPE_IB) { if (!rdma_ah_get_dlid(attr) || rdma_ah_get_dlid(attr) >= be16_to_cpu(IB_MULTICAST_LID_BASE)) return false; } else if (attr->type == RDMA_AH_ATTR_TYPE_OPA) { if (!rdma_ah_get_dlid(attr) || rdma_ah_get_dlid(attr) >= opa_get_mcast_base(OPA_MCAST_NR)) return false; } return true; } #endif /* OPA_ADDR_H */ PK ! ]�� � rdma/ib_sysfs.hnu �[��� /* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ /* * Copyright (c) 2021 Mellanox Technologies Ltd. All rights reserved. */ #ifndef DEF_RDMA_IB_SYSFS_H #define DEF_RDMA_IB_SYSFS_H #include <linux/sysfs.h> struct ib_device; struct ib_port_attribute { struct attribute attr; ssize_t (*show)(struct ib_device *ibdev, u32 port_num, struct ib_port_attribute *attr, char *buf); ssize_t (*store)(struct ib_device *ibdev, u32 port_num, struct ib_port_attribute *attr, const char *buf, size_t count); }; #define IB_PORT_ATTR_RW(_name) \ struct ib_port_attribute ib_port_attr_##_name = __ATTR_RW(_name) #define IB_PORT_ATTR_ADMIN_RW(_name) \ struct ib_port_attribute ib_port_attr_##_name = \ __ATTR_RW_MODE(_name, 0600) #define IB_PORT_ATTR_RO(_name) \ struct ib_port_attribute ib_port_attr_##_name = __ATTR_RO(_name) #define IB_PORT_ATTR_WO(_name) \ struct ib_port_attribute ib_port_attr_##_name = __ATTR_WO(_name) struct ib_device *ib_port_sysfs_get_ibdev_kobj(struct kobject *kobj, u32 *port_num); #endif PK ! �FO� � rdma/uverbs_std_types.hnu �[��� /* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ /* * Copyright (c) 2017, Mellanox Technologies inc. All rights reserved. */ #ifndef _UVERBS_STD_TYPES__ #define _UVERBS_STD_TYPES__ #include <rdma/uverbs_types.h> #include <rdma/uverbs_ioctl.h> #include <rdma/ib_user_ioctl_verbs.h> /* Returns _id, or causes a compile error if _id is not a u32. * * The uobj APIs should only be used with the write based uAPI to access * object IDs. The write API must use a u32 for the object handle, which is * checked by this macro. */ #define _uobj_check_id(_id) ((_id) * typecheck(u32, _id)) #define uobj_get_type(_attrs, _object) \ uapi_get_object((_attrs)->ufile->device->uapi, _object) #define uobj_get_read(_type, _id, _attrs) \ rdma_lookup_get_uobject(uobj_get_type(_attrs, _type), (_attrs)->ufile, \ _uobj_check_id(_id), UVERBS_LOOKUP_READ, \ _attrs) #define ufd_get_read(_type, _fdnum, _attrs) \ rdma_lookup_get_uobject(uobj_get_type(_attrs, _type), (_attrs)->ufile, \ (_fdnum)*typecheck(s32, _fdnum), \ UVERBS_LOOKUP_READ, _attrs) static inline void *_uobj_get_obj_read(struct ib_uobject *uobj) { if (IS_ERR(uobj)) return ERR_CAST(uobj); return uobj->object; } #define uobj_get_obj_read(_object, _type, _id, _attrs) \ ((struct ib_##_object *)_uobj_get_obj_read( \ uobj_get_read(_type, _id, _attrs))) #define uobj_get_write(_type, _id, _attrs) \ rdma_lookup_get_uobject(uobj_get_type(_attrs, _type), (_attrs)->ufile, \ _uobj_check_id(_id), UVERBS_LOOKUP_WRITE, \ _attrs) int __uobj_perform_destroy(const struct uverbs_api_object *obj, u32 id, struct uverbs_attr_bundle *attrs); #define uobj_perform_destroy(_type, _id, _attrs) \ __uobj_perform_destroy(uobj_get_type(_attrs, _type), \ _uobj_check_id(_id), _attrs) struct ib_uobject *__uobj_get_destroy(const struct uverbs_api_object *obj, u32 id, struct uverbs_attr_bundle *attrs); #define uobj_get_destroy(_type, _id, _attrs) \ __uobj_get_destroy(uobj_get_type(_attrs, _type), _uobj_check_id(_id), \ _attrs) static inline void uobj_put_destroy(struct ib_uobject *uobj) { rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_DESTROY); } static inline void uobj_put_read(struct ib_uobject *uobj) { rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_READ); } #define uobj_put_obj_read(_obj) \ uobj_put_read((_obj)->uobject) static inline void uobj_put_write(struct ib_uobject *uobj) { rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_WRITE); } static inline void uobj_alloc_abort(struct ib_uobject *uobj, struct uverbs_attr_bundle *attrs) { rdma_alloc_abort_uobject(uobj, attrs, false); } static inline void uobj_finalize_uobj_create(struct ib_uobject *uobj, struct uverbs_attr_bundle *attrs) { /* * Tell the core code that the write() handler has completed * initializing the object and that the core should commit or * abort this object based upon the return code from the write() * method. Similar to what uverbs_finalize_uobj_create() does for * ioctl() */ WARN_ON(attrs->uobject); attrs->uobject = uobj; } static inline struct ib_uobject * __uobj_alloc(const struct uverbs_api_object *obj, struct uverbs_attr_bundle *attrs, struct ib_device **ib_dev) { struct ib_uobject *uobj = rdma_alloc_begin_uobject(obj, attrs); if (!IS_ERR(uobj)) *ib_dev = attrs->context->device; return uobj; } #define uobj_alloc(_type, _attrs, _ib_dev) \ __uobj_alloc(uobj_get_type(_attrs, _type), _attrs, _ib_dev) static inline void uverbs_flow_action_fill_action(struct ib_flow_action *action, struct ib_uobject *uobj, struct ib_device *ib_dev, enum ib_flow_action_type type) { atomic_set(&action->usecnt, 0); action->device = ib_dev; action->type = type; action->uobject = uobj; uobj->object = action; } struct ib_uflow_resources { size_t max; size_t num; size_t collection_num; size_t counters_num; struct ib_counters **counters; struct ib_flow_action **collection; }; struct ib_uflow_object { struct ib_uobject uobject; struct ib_uflow_resources *resources; }; struct ib_uflow_resources *flow_resources_alloc(size_t num_specs); void flow_resources_add(struct ib_uflow_resources *uflow_res, enum ib_flow_spec_type type, void *ibobj); void ib_uverbs_flow_resources_free(struct ib_uflow_resources *uflow_res); static inline void ib_set_flow(struct ib_uobject *uobj, struct ib_flow *ibflow, struct ib_qp *qp, struct ib_device *device, struct ib_uflow_resources *uflow_res) { struct ib_uflow_object *uflow; uobj->object = ibflow; ibflow->uobject = uobj; if (qp) { atomic_inc(&qp->usecnt); ibflow->qp = qp; } ibflow->device = device; uflow = container_of(uobj, typeof(*uflow), uobject); uflow->resources = uflow_res; } struct uverbs_api_object { const struct uverbs_obj_type *type_attrs; const struct uverbs_obj_type_class *type_class; u8 disabled:1; u32 id; }; static inline u32 uobj_get_object_id(struct ib_uobject *uobj) { return uobj->uapi_object->id; } #endif PK ! ���� � rdma/ib_pma.hnu �[��� /* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ /* * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation. * All rights reserved. * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. */ #ifndef IB_PMA_H #define IB_PMA_H #include <rdma/ib_mad.h> /* * PMA class portinfo capability mask bits */ #define IB_PMA_CLASS_CAP_ALLPORTSELECT cpu_to_be16(1 << 8) #define IB_PMA_CLASS_CAP_EXT_WIDTH cpu_to_be16(1 << 9) #define IB_PMA_CLASS_CAP_EXT_WIDTH_NOIETF cpu_to_be16(1 << 10) #define IB_PMA_CLASS_CAP_XMIT_WAIT cpu_to_be16(1 << 12) #define IB_PMA_CLASS_PORT_INFO cpu_to_be16(0x0001) #define IB_PMA_PORT_SAMPLES_CONTROL cpu_to_be16(0x0010) #define IB_PMA_PORT_SAMPLES_RESULT cpu_to_be16(0x0011) #define IB_PMA_PORT_COUNTERS cpu_to_be16(0x0012) #define IB_PMA_PORT_COUNTERS_EXT cpu_to_be16(0x001D) #define IB_PMA_PORT_SAMPLES_RESULT_EXT cpu_to_be16(0x001E) struct ib_pma_mad { struct ib_mad_hdr mad_hdr; u8 reserved[40]; u8 data[192]; } __packed; struct ib_pma_portsamplescontrol { u8 opcode; u8 port_select; u8 tick; u8 counter_width; /* resv: 7:3, counter width: 2:0 */ __be32 counter_mask0_9; /* 2, 10 3-bit fields */ __be16 counter_mask10_14; /* 1, 5 3-bit fields */ u8 sample_mechanisms; u8 sample_status; /* only lower 2 bits */ __be64 option_mask; __be64 vendor_mask; __be32 sample_start; __be32 sample_interval; __be16 tag; __be16 counter_select[15]; __be32 reserved1; __be64 samples_only_option_mask; __be32 reserved2[28]; }; struct ib_pma_portsamplesresult { __be16 tag; __be16 sample_status; /* only lower 2 bits */ __be32 counter[15]; }; struct ib_pma_portsamplesresult_ext { __be16 tag; __be16 sample_status; /* only lower 2 bits */ __be32 extended_width; /* only upper 2 bits */ __be64 counter[15]; }; struct ib_pma_portcounters { u8 reserved; u8 port_select; __be16 counter_select; __be16 symbol_error_counter; u8 link_error_recovery_counter; u8 link_downed_counter; __be16 port_rcv_errors; __be16 port_rcv_remphys_errors; __be16 port_rcv_switch_relay_errors; __be16 port_xmit_discards; u8 port_xmit_constraint_errors; u8 port_rcv_constraint_errors; u8 reserved1; u8 link_overrun_errors; /* LocalLink: 7:4, BufferOverrun: 3:0 */ __be16 reserved2; __be16 vl15_dropped; __be32 port_xmit_data; __be32 port_rcv_data; __be32 port_xmit_packets; __be32 port_rcv_packets; __be32 port_xmit_wait; } __packed; #define IB_PMA_SEL_SYMBOL_ERROR cpu_to_be16(0x0001) #define IB_PMA_SEL_LINK_ERROR_RECOVERY cpu_to_be16(0x0002) #define IB_PMA_SEL_LINK_DOWNED cpu_to_be16(0x0004) #define IB_PMA_SEL_PORT_RCV_ERRORS cpu_to_be16(0x0008) #define IB_PMA_SEL_PORT_RCV_REMPHYS_ERRORS cpu_to_be16(0x0010) #define IB_PMA_SEL_PORT_XMIT_DISCARDS cpu_to_be16(0x0040) #define IB_PMA_SEL_LOCAL_LINK_INTEGRITY_ERRORS cpu_to_be16(0x0200) #define IB_PMA_SEL_EXCESSIVE_BUFFER_OVERRUNS cpu_to_be16(0x0400) #define IB_PMA_SEL_PORT_VL15_DROPPED cpu_to_be16(0x0800) #define IB_PMA_SEL_PORT_XMIT_DATA cpu_to_be16(0x1000) #define IB_PMA_SEL_PORT_RCV_DATA cpu_to_be16(0x2000) #define IB_PMA_SEL_PORT_XMIT_PACKETS cpu_to_be16(0x4000) #define IB_PMA_SEL_PORT_RCV_PACKETS cpu_to_be16(0x8000) struct ib_pma_portcounters_ext { u8 reserved; u8 port_select; __be16 counter_select; __be32 reserved1; __be64 port_xmit_data; __be64 port_rcv_data; __be64 port_xmit_packets; __be64 port_rcv_packets; __be64 port_unicast_xmit_packets; __be64 port_unicast_rcv_packets; __be64 port_multicast_xmit_packets; __be64 port_multicast_rcv_packets; } __packed; #define IB_PMA_SELX_PORT_XMIT_DATA cpu_to_be16(0x0001) #define IB_PMA_SELX_PORT_RCV_DATA cpu_to_be16(0x0002) #define IB_PMA_SELX_PORT_XMIT_PACKETS cpu_to_be16(0x0004) #define IB_PMA_SELX_PORT_RCV_PACKETS cpu_to_be16(0x0008) #define IB_PMA_SELX_PORT_UNI_XMIT_PACKETS cpu_to_be16(0x0010) #define IB_PMA_SELX_PORT_UNI_RCV_PACKETS cpu_to_be16(0x0020) #define IB_PMA_SELX_PORT_MULTI_XMIT_PACKETS cpu_to_be16(0x0040) #define IB_PMA_SELX_PORT_MULTI_RCV_PACKETS cpu_to_be16(0x0080) #endif /* IB_PMA_H */ PK ! 7,d rdma/ib_marshall.hnu �[��� /* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ /* * Copyright (c) 2005-2006 Intel Corporation. All rights reserved. */ #ifndef IB_USER_MARSHALL_H #define IB_USER_MARSHALL_H #include <rdma/ib_verbs.h> #include <rdma/ib_sa.h> #include <rdma/ib_user_verbs.h> #include <rdma/ib_user_sa.h> void ib_copy_qp_attr_to_user(struct ib_device *device, struct ib_uverbs_qp_attr *dst, struct ib_qp_attr *src); void ib_copy_ah_attr_to_user(struct ib_device *device, struct ib_uverbs_ah_attr *dst, struct rdma_ah_attr *src); void ib_copy_path_rec_to_user(struct ib_user_path_rec *dst, struct sa_path_rec *src); void ib_copy_path_rec_from_user(struct sa_path_rec *dst, struct ib_user_path_rec *src); #endif /* IB_USER_MARSHALL_H */ PK ! ���� � rdma/opa_vnic.hnu �[��� /* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ /* * Copyright(c) 2017 - 2020 Intel Corporation. */ #ifndef _OPA_VNIC_H #define _OPA_VNIC_H /* * This file contains Intel Omni-Path (OPA) Virtual Network Interface * Controller (VNIC) specific declarations. */ #include <rdma/ib_verbs.h> /* 16 header bytes + 2 reserved bytes */ #define OPA_VNIC_L2_HDR_LEN (16 + 2) #define OPA_VNIC_L4_HDR_LEN 2 #define OPA_VNIC_HDR_LEN (OPA_VNIC_L2_HDR_LEN + \ OPA_VNIC_L4_HDR_LEN) #define OPA_VNIC_L4_ETHR 0x78 #define OPA_VNIC_ICRC_LEN 4 #define OPA_VNIC_TAIL_LEN 1 #define OPA_VNIC_ICRC_TAIL_LEN (OPA_VNIC_ICRC_LEN + OPA_VNIC_TAIL_LEN) #define OPA_VNIC_SKB_MDATA_LEN 4 #define OPA_VNIC_SKB_MDATA_ENCAP_ERR 0x1 /* opa vnic rdma netdev's private data structure */ struct opa_vnic_rdma_netdev { struct rdma_netdev rn; /* keep this first */ /* followed by device private data */ char *dev_priv[]; }; static inline void *opa_vnic_priv(const struct net_device *dev) { struct rdma_netdev *rn = netdev_priv(dev); return rn->clnt_priv; } static inline void *opa_vnic_dev_priv(const struct net_device *dev) { struct opa_vnic_rdma_netdev *oparn = netdev_priv(dev); return oparn->dev_priv; } /* opa_vnic skb meta data structrue */ struct opa_vnic_skb_mdata { u8 vl; u8 entropy; u8 flags; u8 rsvd; } __packed; /* OPA VNIC group statistics */ struct opa_vnic_grp_stats { u64 unicast; u64 mcastbcast; u64 untagged; u64 vlan; u64 s_64; u64 s_65_127; u64 s_128_255; u64 s_256_511; u64 s_512_1023; u64 s_1024_1518; u64 s_1519_max; }; struct opa_vnic_stats { /* standard netdev statistics */ struct rtnl_link_stats64 netstats; /* OPA VNIC statistics */ struct opa_vnic_grp_stats tx_grp; struct opa_vnic_grp_stats rx_grp; u64 tx_dlid_zero; u64 tx_drop_state; u64 rx_drop_state; u64 rx_runt; u64 rx_oversize; }; static inline bool rdma_cap_opa_vnic(struct ib_device *device) { return !!(device->attrs.device_cap_flags & IB_DEVICE_RDMA_NETDEV_OPA); } #endif /* _OPA_VNIC_H */ PK ! �,��� � rdma/rdma_netlink.hnu �[��� /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _RDMA_NETLINK_H #define _RDMA_NETLINK_H #include <linux/netlink.h> #include <uapi/rdma/rdma_netlink.h> enum { RDMA_NLDEV_ATTR_EMPTY_STRING = 1, RDMA_NLDEV_ATTR_ENTRY_STRLEN = 16, RDMA_NLDEV_ATTR_CHARDEV_TYPE_SIZE = 32, }; struct rdma_nl_cbs { int (*doit)(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack); int (*dump)(struct sk_buff *skb, struct netlink_callback *nlcb); u8 flags; }; enum rdma_nl_flags { /* Require CAP_NET_ADMIN */ RDMA_NL_ADMIN_PERM = 1 << 0, }; /* Define this module as providing netlink services for NETLINK_RDMA, with * index _index. Since the client indexes were setup in a uapi header as an * enum and we do no want to change that, the user must supply the expanded * constant as well and the compiler checks they are the same. */ #define MODULE_ALIAS_RDMA_NETLINK(_index, _val) \ static inline void __maybe_unused __chk_##_index(void) \ { \ BUILD_BUG_ON(_index != _val); \ } \ MODULE_ALIAS("rdma-netlink-subsys-" __stringify(_val)) /** * Register client in RDMA netlink. * @index: Index of the added client * @cb_table: A table for op->callback */ void rdma_nl_register(unsigned int index, const struct rdma_nl_cbs cb_table[]); /** * Remove a client from IB netlink. * @index: Index of the removed IB client. */ void rdma_nl_unregister(unsigned int index); /** * Put a new message in a supplied skb. * @skb: The netlink skb. * @nlh: Pointer to put the header of the new netlink message. * @seq: The message sequence number. * @len: The requested message length to allocate. * @client: Calling IB netlink client. * @op: message content op. * Returns the allocated buffer on success and NULL on failure. */ void *ibnl_put_msg(struct sk_buff *skb, struct nlmsghdr **nlh, int seq, int len, int client, int op, int flags); /** * Put a new attribute in a supplied skb. * @skb: The netlink skb. * @nlh: Header of the netlink message to append the attribute to. * @len: The length of the attribute data. * @data: The attribute data to put. * @type: The attribute type. * Returns the 0 and a negative error code on failure. */ int ibnl_put_attr(struct sk_buff *skb, struct nlmsghdr *nlh, int len, void *data, int type); /** * Send the supplied skb to a specific userspace PID. * @net: Net namespace in which to send the skb * @skb: The netlink skb * @pid: Userspace netlink process ID * Returns 0 on success or a negative error code. */ int rdma_nl_unicast(struct net *net, struct sk_buff *skb, u32 pid); /** * Send, with wait/1 retry, the supplied skb to a specific userspace PID. * @net: Net namespace in which to send the skb * @skb: The netlink skb * @pid: Userspace netlink process ID * Returns 0 on success or a negative error code. */ int rdma_nl_unicast_wait(struct net *net, struct sk_buff *skb, __u32 pid); /** * Send the supplied skb to a netlink group. * @net: Net namespace in which to send the skb * @skb: The netlink skb * @group: Netlink group ID * @flags: allocation flags * Returns 0 on success or a negative error code. */ int rdma_nl_multicast(struct net *net, struct sk_buff *skb, unsigned int group, gfp_t flags); /** * Check if there are any listeners to the netlink group * @group: the netlink group ID * Returns true on success or false if no listeners. */ bool rdma_nl_chk_listeners(unsigned int group); struct rdma_link_ops { struct list_head list; const char *type; int (*newlink)(const char *ibdev_name, struct net_device *ndev); }; void rdma_link_register(struct rdma_link_ops *ops); void rdma_link_unregister(struct rdma_link_ops *ops); #define MODULE_ALIAS_RDMA_LINK(type) MODULE_ALIAS("rdma-link-" type) #define MODULE_ALIAS_RDMA_CLIENT(type) MODULE_ALIAS("rdma-client-" type) #endif /* _RDMA_NETLINK_H */ PK ! Н>B� � rdma/uverbs_ioctl.hnu �[��� /* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ /* * Copyright (c) 2017, Mellanox Technologies inc. All rights reserved. */ #ifndef _UVERBS_IOCTL_ #define _UVERBS_IOCTL_ #include <rdma/uverbs_types.h> #include <linux/uaccess.h> #include <rdma/rdma_user_ioctl.h> #include <rdma/ib_user_ioctl_verbs.h> #include <rdma/ib_user_ioctl_cmds.h> /* * ======================================= * Verbs action specifications * ======================================= */ enum uverbs_attr_type { UVERBS_ATTR_TYPE_NA, UVERBS_ATTR_TYPE_PTR_IN, UVERBS_ATTR_TYPE_PTR_OUT, UVERBS_ATTR_TYPE_IDR, UVERBS_ATTR_TYPE_FD, UVERBS_ATTR_TYPE_ENUM_IN, UVERBS_ATTR_TYPE_IDRS_ARRAY, }; enum uverbs_obj_access { UVERBS_ACCESS_READ, UVERBS_ACCESS_WRITE, UVERBS_ACCESS_NEW, UVERBS_ACCESS_DESTROY }; /* Specification of a single attribute inside the ioctl message */ /* good size 16 */ struct uverbs_attr_spec { u8 type; /* * Support extending attributes by length. Allow the user to provide * more bytes than ptr.len, but check that everything after is zero'd * by the user. */ u8 zero_trailing:1; /* * Valid only for PTR_IN. Allocate and copy the data inside * the parser */ u8 alloc_and_copy:1; u8 mandatory:1; /* True if this is from UVERBS_ATTR_UHW */ u8 is_udata:1; union { struct { /* Current known size to kernel */ u16 len; /* User isn't allowed to provide something < min_len */ u16 min_len; } ptr; struct { /* * higher bits mean the namespace and lower bits mean * the type id within the namespace. */ u16 obj_type; u8 access; } obj; struct { u8 num_elems; } enum_def; } u; /* This weird split lets us remove some padding */ union { struct { /* * The enum attribute can select one of the attributes * contained in the ids array. Currently only PTR_IN * attributes are supported in the ids array. */ const struct uverbs_attr_spec *ids; } enum_def; struct { /* * higher bits mean the namespace and lower bits mean * the type id within the namespace. */ u16 obj_type; u16 min_len; u16 max_len; u8 access; } objs_arr; } u2; }; /* * Information about the API is loaded into a radix tree. For IOCTL we start * with a tuple of: * object_id, attr_id, method_id * * Which is a 48 bit value, with most of the bits guaranteed to be zero. Based * on the current kernel support this is compressed into 16 bit key for the * radix tree. Since this compression is entirely internal to the kernel the * below limits can be revised if the kernel gains additional data. * * With 64 leafs per node this is a 3 level radix tree. * * The tree encodes multiple types, and uses a scheme where OBJ_ID,0,0 returns * the object slot, and OBJ_ID,METH_ID,0 and returns the method slot. * * This also encodes the tables for the write() and write() extended commands * using the coding * OBJ_ID,UVERBS_API_METHOD_IS_WRITE,command # * OBJ_ID,UVERBS_API_METHOD_IS_WRITE_EX,command_ex # * ie the WRITE path is treated as a special method type in the ioctl * framework. */ enum uapi_radix_data { UVERBS_API_NS_FLAG = 1U << UVERBS_ID_NS_SHIFT, UVERBS_API_ATTR_KEY_BITS = 6, UVERBS_API_ATTR_KEY_MASK = GENMASK(UVERBS_API_ATTR_KEY_BITS - 1, 0), UVERBS_API_ATTR_BKEY_LEN = (1 << UVERBS_API_ATTR_KEY_BITS) - 1, UVERBS_API_WRITE_KEY_NUM = 1 << UVERBS_API_ATTR_KEY_BITS, UVERBS_API_METHOD_KEY_BITS = 5, UVERBS_API_METHOD_KEY_SHIFT = UVERBS_API_ATTR_KEY_BITS, UVERBS_API_METHOD_KEY_NUM_CORE = 22, UVERBS_API_METHOD_IS_WRITE = 30 << UVERBS_API_METHOD_KEY_SHIFT, UVERBS_API_METHOD_IS_WRITE_EX = 31 << UVERBS_API_METHOD_KEY_SHIFT, UVERBS_API_METHOD_KEY_NUM_DRIVER = (UVERBS_API_METHOD_IS_WRITE >> UVERBS_API_METHOD_KEY_SHIFT) - UVERBS_API_METHOD_KEY_NUM_CORE, UVERBS_API_METHOD_KEY_MASK = GENMASK( UVERBS_API_METHOD_KEY_BITS + UVERBS_API_METHOD_KEY_SHIFT - 1, UVERBS_API_METHOD_KEY_SHIFT), UVERBS_API_OBJ_KEY_BITS = 5, UVERBS_API_OBJ_KEY_SHIFT = UVERBS_API_METHOD_KEY_BITS + UVERBS_API_METHOD_KEY_SHIFT, UVERBS_API_OBJ_KEY_NUM_CORE = 20, UVERBS_API_OBJ_KEY_NUM_DRIVER = (1 << UVERBS_API_OBJ_KEY_BITS) - UVERBS_API_OBJ_KEY_NUM_CORE, UVERBS_API_OBJ_KEY_MASK = GENMASK(31, UVERBS_API_OBJ_KEY_SHIFT), /* This id guaranteed to not exist in the radix tree */ UVERBS_API_KEY_ERR = 0xFFFFFFFF, }; static inline __attribute_const__ u32 uapi_key_obj(u32 id) { if (id & UVERBS_API_NS_FLAG) { id &= ~UVERBS_API_NS_FLAG; if (id >= UVERBS_API_OBJ_KEY_NUM_DRIVER) return UVERBS_API_KEY_ERR; id = id + UVERBS_API_OBJ_KEY_NUM_CORE; } else { if (id >= UVERBS_API_OBJ_KEY_NUM_CORE) return UVERBS_API_KEY_ERR; } return id << UVERBS_API_OBJ_KEY_SHIFT; } static inline __attribute_const__ bool uapi_key_is_object(u32 key) { return (key & ~UVERBS_API_OBJ_KEY_MASK) == 0; } static inline __attribute_const__ u32 uapi_key_ioctl_method(u32 id) { if (id & UVERBS_API_NS_FLAG) { id &= ~UVERBS_API_NS_FLAG; if (id >= UVERBS_API_METHOD_KEY_NUM_DRIVER) return UVERBS_API_KEY_ERR; id = id + UVERBS_API_METHOD_KEY_NUM_CORE; } else { id++; if (id >= UVERBS_API_METHOD_KEY_NUM_CORE) return UVERBS_API_KEY_ERR; } return id << UVERBS_API_METHOD_KEY_SHIFT; } static inline __attribute_const__ u32 uapi_key_write_method(u32 id) { if (id >= UVERBS_API_WRITE_KEY_NUM) return UVERBS_API_KEY_ERR; return UVERBS_API_METHOD_IS_WRITE | id; } static inline __attribute_const__ u32 uapi_key_write_ex_method(u32 id) { if (id >= UVERBS_API_WRITE_KEY_NUM) return UVERBS_API_KEY_ERR; return UVERBS_API_METHOD_IS_WRITE_EX | id; } static inline __attribute_const__ u32 uapi_key_attr_to_ioctl_method(u32 attr_key) { return attr_key & (UVERBS_API_OBJ_KEY_MASK | UVERBS_API_METHOD_KEY_MASK); } static inline __attribute_const__ bool uapi_key_is_ioctl_method(u32 key) { unsigned int method = key & UVERBS_API_METHOD_KEY_MASK; return method != 0 && method < UVERBS_API_METHOD_IS_WRITE && (key & UVERBS_API_ATTR_KEY_MASK) == 0; } static inline __attribute_const__ bool uapi_key_is_write_method(u32 key) { return (key & UVERBS_API_METHOD_KEY_MASK) == UVERBS_API_METHOD_IS_WRITE; } static inline __attribute_const__ bool uapi_key_is_write_ex_method(u32 key) { return (key & UVERBS_API_METHOD_KEY_MASK) == UVERBS_API_METHOD_IS_WRITE_EX; } static inline __attribute_const__ u32 uapi_key_attrs_start(u32 ioctl_method_key) { /* 0 is the method slot itself */ return ioctl_method_key + 1; } static inline __attribute_const__ u32 uapi_key_attr(u32 id) { /* * The attr is designed to fit in the typical single radix tree node * of 64 entries. Since allmost all methods have driver attributes we * organize things so that the driver and core attributes interleave to * reduce the length of the attributes array in typical cases. */ if (id & UVERBS_API_NS_FLAG) { id &= ~UVERBS_API_NS_FLAG; id++; if (id >= 1 << (UVERBS_API_ATTR_KEY_BITS - 1)) return UVERBS_API_KEY_ERR; id = (id << 1) | 0; } else { if (id >= 1 << (UVERBS_API_ATTR_KEY_BITS - 1)) return UVERBS_API_KEY_ERR; id = (id << 1) | 1; } return id; } /* Only true for ioctl methods */ static inline __attribute_const__ bool uapi_key_is_attr(u32 key) { unsigned int method = key & UVERBS_API_METHOD_KEY_MASK; return method != 0 && method < UVERBS_API_METHOD_IS_WRITE && (key & UVERBS_API_ATTR_KEY_MASK) != 0; } /* * This returns a value in the range [0 to UVERBS_API_ATTR_BKEY_LEN), * basically it undoes the reservation of 0 in the ID numbering. attr_key * must already be masked with UVERBS_API_ATTR_KEY_MASK, or be the output of * uapi_key_attr(). */ static inline __attribute_const__ u32 uapi_bkey_attr(u32 attr_key) { return attr_key - 1; } static inline __attribute_const__ u32 uapi_bkey_to_key_attr(u32 attr_bkey) { return attr_bkey + 1; } /* * ======================================= * Verbs definitions * ======================================= */ struct uverbs_attr_def { u16 id; struct uverbs_attr_spec attr; }; struct uverbs_method_def { u16 id; /* Combination of bits from enum UVERBS_ACTION_FLAG_XXXX */ u32 flags; size_t num_attrs; const struct uverbs_attr_def * const (*attrs)[]; int (*handler)(struct uverbs_attr_bundle *attrs); }; struct uverbs_object_def { u16 id; const struct uverbs_obj_type *type_attrs; size_t num_methods; const struct uverbs_method_def * const (*methods)[]; }; enum uapi_definition_kind { UAPI_DEF_END = 0, UAPI_DEF_OBJECT_START, UAPI_DEF_WRITE, UAPI_DEF_CHAIN_OBJ_TREE, UAPI_DEF_CHAIN, UAPI_DEF_IS_SUPPORTED_FUNC, UAPI_DEF_IS_SUPPORTED_DEV_FN, }; enum uapi_definition_scope { UAPI_SCOPE_OBJECT = 1, UAPI_SCOPE_METHOD = 2, }; struct uapi_definition { u8 kind; u8 scope; union { struct { u16 object_id; } object_start; struct { u16 command_num; u8 is_ex:1; u8 has_udata:1; u8 has_resp:1; u8 req_size; u8 resp_size; } write; }; union { bool (*func_is_supported)(struct ib_device *device); int (*func_write)(struct uverbs_attr_bundle *attrs); const struct uapi_definition *chain; const struct uverbs_object_def *chain_obj_tree; size_t needs_fn_offset; }; }; /* Define things connected to object_id */ #define DECLARE_UVERBS_OBJECT(_object_id, ...) \ { \ .kind = UAPI_DEF_OBJECT_START, \ .object_start = { .object_id = _object_id }, \ }, \ ##__VA_ARGS__ /* Use in a var_args of DECLARE_UVERBS_OBJECT */ #define DECLARE_UVERBS_WRITE(_command_num, _func, _cmd_desc, ...) \ { \ .kind = UAPI_DEF_WRITE, \ .scope = UAPI_SCOPE_OBJECT, \ .write = { .is_ex = 0, .command_num = _command_num }, \ .func_write = _func, \ _cmd_desc, \ }, \ ##__VA_ARGS__ /* Use in a var_args of DECLARE_UVERBS_OBJECT */ #define DECLARE_UVERBS_WRITE_EX(_command_num, _func, _cmd_desc, ...) \ { \ .kind = UAPI_DEF_WRITE, \ .scope = UAPI_SCOPE_OBJECT, \ .write = { .is_ex = 1, .command_num = _command_num }, \ .func_write = _func, \ _cmd_desc, \ }, \ ##__VA_ARGS__ /* * Object is only supported if the function pointer named ibdev_fn in struct * ib_device is not NULL. */ #define UAPI_DEF_OBJ_NEEDS_FN(ibdev_fn) \ { \ .kind = UAPI_DEF_IS_SUPPORTED_DEV_FN, \ .scope = UAPI_SCOPE_OBJECT, \ .needs_fn_offset = \ offsetof(struct ib_device_ops, ibdev_fn) + \ BUILD_BUG_ON_ZERO(sizeof_field(struct ib_device_ops, \ ibdev_fn) != \ sizeof(void *)), \ } /* * Method is only supported if the function pointer named ibdev_fn in struct * ib_device is not NULL. */ #define UAPI_DEF_METHOD_NEEDS_FN(ibdev_fn) \ { \ .kind = UAPI_DEF_IS_SUPPORTED_DEV_FN, \ .scope = UAPI_SCOPE_METHOD, \ .needs_fn_offset = \ offsetof(struct ib_device_ops, ibdev_fn) + \ BUILD_BUG_ON_ZERO(sizeof_field(struct ib_device_ops, \ ibdev_fn) != \ sizeof(void *)), \ } /* Call a function to determine if the entire object is supported or not */ #define UAPI_DEF_IS_OBJ_SUPPORTED(_func) \ { \ .kind = UAPI_DEF_IS_SUPPORTED_FUNC, \ .scope = UAPI_SCOPE_OBJECT, .func_is_supported = _func, \ } /* Include another struct uapi_definition in this one */ #define UAPI_DEF_CHAIN(_def_var) \ { \ .kind = UAPI_DEF_CHAIN, .chain = _def_var, \ } /* Temporary until the tree base description is replaced */ #define UAPI_DEF_CHAIN_OBJ_TREE(_object_enum, _object_ptr, ...) \ { \ .kind = UAPI_DEF_CHAIN_OBJ_TREE, \ .object_start = { .object_id = _object_enum }, \ .chain_obj_tree = _object_ptr, \ }, \ ##__VA_ARGS__ #define UAPI_DEF_CHAIN_OBJ_TREE_NAMED(_object_enum, ...) \ UAPI_DEF_CHAIN_OBJ_TREE(_object_enum, &UVERBS_OBJECT(_object_enum), \ ##__VA_ARGS__) /* * ======================================= * Attribute Specifications * ======================================= */ #define UVERBS_ATTR_SIZE(_min_len, _len) \ .u.ptr.min_len = _min_len, .u.ptr.len = _len #define UVERBS_ATTR_NO_DATA() UVERBS_ATTR_SIZE(0, 0) /* * Specifies a uapi structure that cannot be extended. The user must always * supply the whole structure and nothing more. The structure must be declared * in a header under include/uapi/rdma. */ #define UVERBS_ATTR_TYPE(_type) \ .u.ptr.min_len = sizeof(_type), .u.ptr.len = sizeof(_type) /* * Specifies a uapi structure where the user must provide at least up to * member 'last'. Anything after last and up until the end of the structure * can be non-zero, anything longer than the end of the structure must be * zero. The structure must be declared in a header under include/uapi/rdma. */ #define UVERBS_ATTR_STRUCT(_type, _last) \ .zero_trailing = 1, \ UVERBS_ATTR_SIZE(offsetofend(_type, _last), sizeof(_type)) /* * Specifies at least min_len bytes must be passed in, but the amount can be * larger, up to the protocol maximum size. No check for zeroing is done. */ #define UVERBS_ATTR_MIN_SIZE(_min_len) UVERBS_ATTR_SIZE(_min_len, USHRT_MAX) /* Must be used in the '...' of any UVERBS_ATTR */ #define UA_ALLOC_AND_COPY .alloc_and_copy = 1 #define UA_MANDATORY .mandatory = 1 #define UA_OPTIONAL .mandatory = 0 /* * min_len must be bigger than 0 and _max_len must be smaller than 4095. Only * READ\WRITE accesses are supported. */ #define UVERBS_ATTR_IDRS_ARR(_attr_id, _idr_type, _access, _min_len, _max_len, \ ...) \ (&(const struct uverbs_attr_def){ \ .id = (_attr_id) + \ BUILD_BUG_ON_ZERO((_min_len) == 0 || \ (_max_len) > \ PAGE_SIZE / sizeof(void *) || \ (_min_len) > (_max_len) || \ (_access) == UVERBS_ACCESS_NEW || \ (_access) == UVERBS_ACCESS_DESTROY), \ .attr = { .type = UVERBS_ATTR_TYPE_IDRS_ARRAY, \ .u2.objs_arr.obj_type = _idr_type, \ .u2.objs_arr.access = _access, \ .u2.objs_arr.min_len = _min_len, \ .u2.objs_arr.max_len = _max_len, \ __VA_ARGS__ } }) /* * Only for use with UVERBS_ATTR_IDR, allows any uobject type to be accepted, * the user must validate the type of the uobject instead. */ #define UVERBS_IDR_ANY_OBJECT 0xFFFF #define UVERBS_ATTR_IDR(_attr_id, _idr_type, _access, ...) \ (&(const struct uverbs_attr_def){ \ .id = _attr_id, \ .attr = { .type = UVERBS_ATTR_TYPE_IDR, \ .u.obj.obj_type = _idr_type, \ .u.obj.access = _access, \ __VA_ARGS__ } }) #define UVERBS_ATTR_FD(_attr_id, _fd_type, _access, ...) \ (&(const struct uverbs_attr_def){ \ .id = (_attr_id) + \ BUILD_BUG_ON_ZERO((_access) != UVERBS_ACCESS_NEW && \ (_access) != UVERBS_ACCESS_READ), \ .attr = { .type = UVERBS_ATTR_TYPE_FD, \ .u.obj.obj_type = _fd_type, \ .u.obj.access = _access, \ __VA_ARGS__ } }) #define UVERBS_ATTR_PTR_IN(_attr_id, _type, ...) \ (&(const struct uverbs_attr_def){ \ .id = _attr_id, \ .attr = { .type = UVERBS_ATTR_TYPE_PTR_IN, \ _type, \ __VA_ARGS__ } }) #define UVERBS_ATTR_PTR_OUT(_attr_id, _type, ...) \ (&(const struct uverbs_attr_def){ \ .id = _attr_id, \ .attr = { .type = UVERBS_ATTR_TYPE_PTR_OUT, \ _type, \ __VA_ARGS__ } }) /* _enum_arry should be a 'static const union uverbs_attr_spec[]' */ #define UVERBS_ATTR_ENUM_IN(_attr_id, _enum_arr, ...) \ (&(const struct uverbs_attr_def){ \ .id = _attr_id, \ .attr = { .type = UVERBS_ATTR_TYPE_ENUM_IN, \ .u2.enum_def.ids = _enum_arr, \ .u.enum_def.num_elems = ARRAY_SIZE(_enum_arr), \ __VA_ARGS__ }, \ }) /* An input value that is a member in the enum _enum_type. */ #define UVERBS_ATTR_CONST_IN(_attr_id, _enum_type, ...) \ UVERBS_ATTR_PTR_IN( \ _attr_id, \ UVERBS_ATTR_SIZE( \ sizeof(u64) + BUILD_BUG_ON_ZERO(!sizeof(_enum_type)), \ sizeof(u64)), \ __VA_ARGS__) /* * An input value that is a bitwise combination of values of _enum_type. * This permits the flag value to be passed as either a u32 or u64, it must * be retrieved via uverbs_get_flag(). */ #define UVERBS_ATTR_FLAGS_IN(_attr_id, _enum_type, ...) \ UVERBS_ATTR_PTR_IN( \ _attr_id, \ UVERBS_ATTR_SIZE(sizeof(u32) + BUILD_BUG_ON_ZERO( \ !sizeof(_enum_type *)), \ sizeof(u64)), \ __VA_ARGS__) /* * This spec is used in order to pass information to the hardware driver in a * legacy way. Every verb that could get driver specific data should get this * spec. */ #define UVERBS_ATTR_UHW() \ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_UHW_IN, \ UVERBS_ATTR_MIN_SIZE(0), \ UA_OPTIONAL, \ .is_udata = 1), \ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_UHW_OUT, \ UVERBS_ATTR_MIN_SIZE(0), \ UA_OPTIONAL, \ .is_udata = 1) /* ================================================= * Parsing infrastructure * ================================================= */ struct uverbs_ptr_attr { /* * If UVERBS_ATTR_SPEC_F_ALLOC_AND_COPY is set then the 'ptr' is * used. */ union { void *ptr; u64 data; }; u16 len; u16 uattr_idx; u8 enum_id; }; struct uverbs_obj_attr { struct ib_uobject *uobject; const struct uverbs_api_attr *attr_elm; }; struct uverbs_objs_arr_attr { struct ib_uobject **uobjects; u16 len; }; struct uverbs_attr { union { struct uverbs_ptr_attr ptr_attr; struct uverbs_obj_attr obj_attr; struct uverbs_objs_arr_attr objs_arr_attr; }; }; struct uverbs_attr_bundle { struct ib_udata driver_udata; struct ib_udata ucore; struct ib_uverbs_file *ufile; struct ib_ucontext *context; struct ib_uobject *uobject; DECLARE_BITMAP(attr_present, UVERBS_API_ATTR_BKEY_LEN); struct uverbs_attr attrs[]; }; static inline bool uverbs_attr_is_valid(const struct uverbs_attr_bundle *attrs_bundle, unsigned int idx) { return test_bit(uapi_bkey_attr(uapi_key_attr(idx)), attrs_bundle->attr_present); } /** * rdma_udata_to_drv_context - Helper macro to get the driver's context out of * ib_udata which is embedded in uverbs_attr_bundle. * * If udata is not NULL this cannot fail. Otherwise a NULL udata will result * in a NULL ucontext pointer, as a safety precaution. Callers should be using * 'udata' to determine if the driver call is in user or kernel mode, not * 'ucontext'. * */ static inline struct uverbs_attr_bundle * rdma_udata_to_uverbs_attr_bundle(struct ib_udata *udata) { return container_of(udata, struct uverbs_attr_bundle, driver_udata); } #define rdma_udata_to_drv_context(udata, drv_dev_struct, member) \ (udata ? container_of(rdma_udata_to_uverbs_attr_bundle(udata)->context, \ drv_dev_struct, member) : (drv_dev_struct *)NULL) #define IS_UVERBS_COPY_ERR(_ret) ((_ret) && (_ret) != -ENOENT) static inline const struct uverbs_attr *uverbs_attr_get(const struct uverbs_attr_bundle *attrs_bundle, u16 idx) { if (!uverbs_attr_is_valid(attrs_bundle, idx)) return ERR_PTR(-ENOENT); return &attrs_bundle->attrs[uapi_bkey_attr(uapi_key_attr(idx))]; } static inline int uverbs_attr_get_enum_id(const struct uverbs_attr_bundle *attrs_bundle, u16 idx) { const struct uverbs_attr *attr = uverbs_attr_get(attrs_bundle, idx); if (IS_ERR(attr)) return PTR_ERR(attr); return attr->ptr_attr.enum_id; } static inline void *uverbs_attr_get_obj(const struct uverbs_attr_bundle *attrs_bundle, u16 idx) { const struct uverbs_attr *attr; attr = uverbs_attr_get(attrs_bundle, idx); if (IS_ERR(attr)) return ERR_CAST(attr); return attr->obj_attr.uobject->object; } static inline struct ib_uobject *uverbs_attr_get_uobject(const struct uverbs_attr_bundle *attrs_bundle, u16 idx) { const struct uverbs_attr *attr = uverbs_attr_get(attrs_bundle, idx); if (IS_ERR(attr)) return ERR_CAST(attr); return attr->obj_attr.uobject; } static inline int uverbs_attr_get_len(const struct uverbs_attr_bundle *attrs_bundle, u16 idx) { const struct uverbs_attr *attr = uverbs_attr_get(attrs_bundle, idx); if (IS_ERR(attr)) return PTR_ERR(attr); return attr->ptr_attr.len; } void uverbs_finalize_uobj_create(const struct uverbs_attr_bundle *attrs_bundle, u16 idx); /* * uverbs_attr_ptr_get_array_size() - Get array size pointer by a ptr * attribute. * @attrs: The attribute bundle * @idx: The ID of the attribute * @elem_size: The size of the element in the array */ static inline int uverbs_attr_ptr_get_array_size(struct uverbs_attr_bundle *attrs, u16 idx, size_t elem_size) { int size = uverbs_attr_get_len(attrs, idx); if (size < 0) return size; if (size % elem_size) return -EINVAL; return size / elem_size; } /** * uverbs_attr_get_uobjs_arr() - Provides array's properties for attribute for * UVERBS_ATTR_TYPE_IDRS_ARRAY. * @arr: Returned pointer to array of pointers for uobjects or NULL if * the attribute isn't provided. * * Return: The array length or 0 if no attribute was provided. */ static inline int uverbs_attr_get_uobjs_arr( const struct uverbs_attr_bundle *attrs_bundle, u16 attr_idx, struct ib_uobject ***arr) { const struct uverbs_attr *attr = uverbs_attr_get(attrs_bundle, attr_idx); if (IS_ERR(attr)) { *arr = NULL; return 0; } *arr = attr->objs_arr_attr.uobjects; return attr->objs_arr_attr.len; } static inline bool uverbs_attr_ptr_is_inline(const struct uverbs_attr *attr) { return attr->ptr_attr.len <= sizeof(attr->ptr_attr.data); } static inline void *uverbs_attr_get_alloced_ptr( const struct uverbs_attr_bundle *attrs_bundle, u16 idx) { const struct uverbs_attr *attr = uverbs_attr_get(attrs_bundle, idx); if (IS_ERR(attr)) return (void *)attr; return uverbs_attr_ptr_is_inline(attr) ? (void *)&attr->ptr_attr.data : attr->ptr_attr.ptr; } static inline int _uverbs_copy_from(void *to, const struct uverbs_attr_bundle *attrs_bundle, size_t idx, size_t size) { const struct uverbs_attr *attr = uverbs_attr_get(attrs_bundle, idx); if (IS_ERR(attr)) return PTR_ERR(attr); /* * Validation ensures attr->ptr_attr.len >= size. If the caller is * using UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO then it must call * uverbs_copy_from_or_zero. */ if (unlikely(size < attr->ptr_attr.len)) return -EINVAL; if (uverbs_attr_ptr_is_inline(attr)) memcpy(to, &attr->ptr_attr.data, attr->ptr_attr.len); else if (copy_from_user(to, u64_to_user_ptr(attr->ptr_attr.data), attr->ptr_attr.len)) return -EFAULT; return 0; } static inline int _uverbs_copy_from_or_zero(void *to, const struct uverbs_attr_bundle *attrs_bundle, size_t idx, size_t size) { const struct uverbs_attr *attr = uverbs_attr_get(attrs_bundle, idx); size_t min_size; if (IS_ERR(attr)) return PTR_ERR(attr); min_size = min_t(size_t, size, attr->ptr_attr.len); if (uverbs_attr_ptr_is_inline(attr)) memcpy(to, &attr->ptr_attr.data, min_size); else if (copy_from_user(to, u64_to_user_ptr(attr->ptr_attr.data), min_size)) return -EFAULT; if (size > min_size) memset(to + min_size, 0, size - min_size); return 0; } #define uverbs_copy_from(to, attrs_bundle, idx) \ _uverbs_copy_from(to, attrs_bundle, idx, sizeof(*to)) #define uverbs_copy_from_or_zero(to, attrs_bundle, idx) \ _uverbs_copy_from_or_zero(to, attrs_bundle, idx, sizeof(*to)) static inline struct ib_ucontext * ib_uverbs_get_ucontext(const struct uverbs_attr_bundle *attrs) { return ib_uverbs_get_ucontext_file(attrs->ufile); } #if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) int uverbs_get_flags64(u64 *to, const struct uverbs_attr_bundle *attrs_bundle, size_t idx, u64 allowed_bits); int uverbs_get_flags32(u32 *to, const struct uverbs_attr_bundle *attrs_bundle, size_t idx, u64 allowed_bits); int uverbs_copy_to(const struct uverbs_attr_bundle *attrs_bundle, size_t idx, const void *from, size_t size); __malloc void *_uverbs_alloc(struct uverbs_attr_bundle *bundle, size_t size, gfp_t flags); static inline __malloc void *uverbs_alloc(struct uverbs_attr_bundle *bundle, size_t size) { return _uverbs_alloc(bundle, size, GFP_KERNEL); } static inline __malloc void *uverbs_zalloc(struct uverbs_attr_bundle *bundle, size_t size) { return _uverbs_alloc(bundle, size, GFP_KERNEL | __GFP_ZERO); } static inline __malloc void *uverbs_kcalloc(struct uverbs_attr_bundle *bundle, size_t n, size_t size) { size_t bytes; if (unlikely(check_mul_overflow(n, size, &bytes))) return ERR_PTR(-EOVERFLOW); return uverbs_zalloc(bundle, bytes); } int _uverbs_get_const_signed(s64 *to, const struct uverbs_attr_bundle *attrs_bundle, size_t idx, s64 lower_bound, u64 upper_bound, s64 *def_val); int _uverbs_get_const_unsigned(u64 *to, const struct uverbs_attr_bundle *attrs_bundle, size_t idx, u64 upper_bound, u64 *def_val); int uverbs_copy_to_struct_or_zero(const struct uverbs_attr_bundle *bundle, size_t idx, const void *from, size_t size); #else static inline int uverbs_get_flags64(u64 *to, const struct uverbs_attr_bundle *attrs_bundle, size_t idx, u64 allowed_bits) { return -EINVAL; } static inline int uverbs_get_flags32(u32 *to, const struct uverbs_attr_bundle *attrs_bundle, size_t idx, u64 allowed_bits) { return -EINVAL; } static inline int uverbs_copy_to(const struct uverbs_attr_bundle *attrs_bundle, size_t idx, const void *from, size_t size) { return -EINVAL; } static inline __malloc void *uverbs_alloc(struct uverbs_attr_bundle *bundle, size_t size) { return ERR_PTR(-EINVAL); } static inline __malloc void *uverbs_zalloc(struct uverbs_attr_bundle *bundle, size_t size) { return ERR_PTR(-EINVAL); } static inline int _uverbs_get_const(s64 *to, const struct uverbs_attr_bundle *attrs_bundle, size_t idx, s64 lower_bound, u64 upper_bound, s64 *def_val) { return -EINVAL; } static inline int uverbs_copy_to_struct_or_zero(const struct uverbs_attr_bundle *bundle, size_t idx, const void *from, size_t size) { return -EINVAL; } static inline int _uverbs_get_const_signed(s64 *to, const struct uverbs_attr_bundle *attrs_bundle, size_t idx, s64 lower_bound, u64 upper_bound, s64 *def_val) { return -EINVAL; } static inline int _uverbs_get_const_unsigned(u64 *to, const struct uverbs_attr_bundle *attrs_bundle, size_t idx, u64 upper_bound, u64 *def_val) { return -EINVAL; } #endif #define uverbs_get_const_signed(_to, _attrs_bundle, _idx) \ ({ \ s64 _val; \ int _ret = \ _uverbs_get_const_signed(&_val, _attrs_bundle, _idx, \ type_min(typeof(*(_to))), \ type_max(typeof(*(_to))), NULL); \ (*(_to)) = _val; \ _ret; \ }) #define uverbs_get_const_unsigned(_to, _attrs_bundle, _idx) \ ({ \ u64 _val; \ int _ret = \ _uverbs_get_const_unsigned(&_val, _attrs_bundle, _idx, \ type_max(typeof(*(_to))), NULL); \ (*(_to)) = _val; \ _ret; \ }) #define uverbs_get_const_default_signed(_to, _attrs_bundle, _idx, _default) \ ({ \ s64 _val; \ s64 _def_val = _default; \ int _ret = \ _uverbs_get_const_signed(&_val, _attrs_bundle, _idx, \ type_min(typeof(*(_to))), \ type_max(typeof(*(_to))), &_def_val); \ (*(_to)) = _val; \ _ret; \ }) #define uverbs_get_const_default_unsigned(_to, _attrs_bundle, _idx, _default) \ ({ \ u64 _val; \ u64 _def_val = _default; \ int _ret = \ _uverbs_get_const_unsigned(&_val, _attrs_bundle, _idx, \ type_max(typeof(*(_to))), &_def_val); \ (*(_to)) = _val; \ _ret; \ }) #define uverbs_get_const(_to, _attrs_bundle, _idx) \ (is_signed_type(typeof(*(_to))) ? \ uverbs_get_const_signed(_to, _attrs_bundle, _idx) : \ uverbs_get_const_unsigned(_to, _attrs_bundle, _idx)) \ #define uverbs_get_const_default(_to, _attrs_bundle, _idx, _default) \ (is_signed_type(typeof(*(_to))) ? \ uverbs_get_const_default_signed(_to, _attrs_bundle, _idx, \ _default) : \ uverbs_get_const_default_unsigned(_to, _attrs_bundle, _idx, \ _default)) #endif PK ! �3AtZ Z rdma/ib.hnu �[��� /* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ /* * Copyright (c) 2010 Intel Corporation. All rights reserved. */ #ifndef _RDMA_IB_H #define _RDMA_IB_H #include <linux/types.h> #include <linux/sched.h> #include <linux/cred.h> #include <linux/uaccess.h> #include <linux/fs.h> struct ib_addr { union { __u8 uib_addr8[16]; __be16 uib_addr16[8]; __be32 uib_addr32[4]; __be64 uib_addr64[2]; } ib_u; #define sib_addr8 ib_u.uib_addr8 #define sib_addr16 ib_u.uib_addr16 #define sib_addr32 ib_u.uib_addr32 #define sib_addr64 ib_u.uib_addr64 #define sib_raw ib_u.uib_addr8 #define sib_subnet_prefix ib_u.uib_addr64[0] #define sib_interface_id ib_u.uib_addr64[1] }; static inline bool ib_addr_any(const struct ib_addr *a) { return ((a->sib_addr64[0] | a->sib_addr64[1]) == 0); } static inline bool ib_addr_loopback(const struct ib_addr *a) { return ((a->sib_addr32[0] | a->sib_addr32[1] | a->sib_addr32[2] | (a->sib_addr32[3] ^ htonl(1))) == 0); } static inline void ib_addr_set(struct ib_addr *addr, __be32 w1, __be32 w2, __be32 w3, __be32 w4) { addr->sib_addr32[0] = w1; addr->sib_addr32[1] = w2; addr->sib_addr32[2] = w3; addr->sib_addr32[3] = w4; } static inline int ib_addr_cmp(const struct ib_addr *a1, const struct ib_addr *a2) { return memcmp(a1, a2, sizeof(struct ib_addr)); } struct sockaddr_ib { unsigned short int sib_family; /* AF_IB */ __be16 sib_pkey; __be32 sib_flowinfo; struct ib_addr sib_addr; __be64 sib_sid; __be64 sib_sid_mask; __u64 sib_scope_id; }; /* * The IB interfaces that use write() as bi-directional ioctl() are * fundamentally unsafe, since there are lots of ways to trigger "write()" * calls from various contexts with elevated privileges. That includes the * traditional suid executable error message writes, but also various kernel * interfaces that can write to file descriptors. * * This function provides protection for the legacy API by restricting the * calling context. */ static inline bool ib_safe_file_access(struct file *filp) { return filp->f_cred == current_cred() && !uaccess_kernel(); } #endif /* _RDMA_IB_H */ PK ! 9�� rdma/opa_smi.hnu �[��� /* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ /* * Copyright (c) 2014 Intel Corporation. All rights reserved. */ #ifndef OPA_SMI_H #define OPA_SMI_H #include <rdma/ib_mad.h> #include <rdma/ib_smi.h> #define OPA_SMP_LID_DATA_SIZE 2016 #define OPA_SMP_DR_DATA_SIZE 1872 #define OPA_SMP_MAX_PATH_HOPS 64 #define OPA_MAX_VLS 32 #define OPA_MAX_SLS 32 #define OPA_MAX_SCS 32 #define OPA_LID_PERMISSIVE cpu_to_be32(0xFFFFFFFF) struct opa_smp { u8 base_version; u8 mgmt_class; u8 class_version; u8 method; __be16 status; u8 hop_ptr; u8 hop_cnt; __be64 tid; __be16 attr_id; __be16 resv; __be32 attr_mod; __be64 mkey; union { struct { uint8_t data[OPA_SMP_LID_DATA_SIZE]; } lid; struct { __be32 dr_slid; __be32 dr_dlid; u8 initial_path[OPA_SMP_MAX_PATH_HOPS]; u8 return_path[OPA_SMP_MAX_PATH_HOPS]; u8 reserved[8]; u8 data[OPA_SMP_DR_DATA_SIZE]; } dr; } route; } __packed; /* Subnet management attributes */ /* ... */ #define OPA_ATTRIB_ID_NODE_DESCRIPTION cpu_to_be16(0x0010) #define OPA_ATTRIB_ID_NODE_INFO cpu_to_be16(0x0011) #define OPA_ATTRIB_ID_PORT_INFO cpu_to_be16(0x0015) #define OPA_ATTRIB_ID_PARTITION_TABLE cpu_to_be16(0x0016) #define OPA_ATTRIB_ID_SL_TO_SC_MAP cpu_to_be16(0x0017) #define OPA_ATTRIB_ID_VL_ARBITRATION cpu_to_be16(0x0018) #define OPA_ATTRIB_ID_SM_INFO cpu_to_be16(0x0020) #define OPA_ATTRIB_ID_CABLE_INFO cpu_to_be16(0x0032) #define OPA_ATTRIB_ID_AGGREGATE cpu_to_be16(0x0080) #define OPA_ATTRIB_ID_SC_TO_SL_MAP cpu_to_be16(0x0082) #define OPA_ATTRIB_ID_SC_TO_VLR_MAP cpu_to_be16(0x0083) #define OPA_ATTRIB_ID_SC_TO_VLT_MAP cpu_to_be16(0x0084) #define OPA_ATTRIB_ID_SC_TO_VLNT_MAP cpu_to_be16(0x0085) /* ... */ #define OPA_ATTRIB_ID_PORT_STATE_INFO cpu_to_be16(0x0087) /* ... */ #define OPA_ATTRIB_ID_BUFFER_CONTROL_TABLE cpu_to_be16(0x008A) /* ... */ struct opa_node_description { u8 data[64]; } __packed; struct opa_node_info { u8 base_version; u8 class_version; u8 node_type; u8 num_ports; __be32 reserved; __be64 system_image_guid; __be64 node_guid; __be64 port_guid; __be16 partition_cap; __be16 device_id; __be32 revision; u8 local_port_num; u8 vendor_id[3]; /* network byte order */ } __packed; #define OPA_PARTITION_TABLE_BLK_SIZE 32 static inline u8 opa_get_smp_direction(struct opa_smp *smp) { return ib_get_smp_direction((struct ib_smp *)smp); } static inline u8 *opa_get_smp_data(struct opa_smp *smp) { if (smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) return smp->route.dr.data; return smp->route.lid.data; } static inline size_t opa_get_smp_data_size(struct opa_smp *smp) { if (smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) return sizeof(smp->route.dr.data); return sizeof(smp->route.lid.data); } static inline size_t opa_get_smp_header_size(struct opa_smp *smp) { if (smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) return sizeof(*smp) - sizeof(smp->route.dr.data); return sizeof(*smp) - sizeof(smp->route.lid.data); } #endif /* OPA_SMI_H */ PK ! ,|�� � rdma/rdmavt_cq.hnu �[��� /* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ /* * Copyright(c) 2016 - 2018 Intel Corporation. */ #ifndef DEF_RDMAVT_INCCQ_H #define DEF_RDMAVT_INCCQ_H #include <linux/kthread.h> #include <rdma/ib_user_verbs.h> #include <rdma/ib_verbs.h> /* * Define an ib_cq_notify value that is not valid so we know when CQ * notifications are armed. */ #define RVT_CQ_NONE (IB_CQ_NEXT_COMP + 1) /* * Define read macro that apply smp_load_acquire memory barrier * when reading indice of circular buffer that mmaped to user space. */ #define RDMA_READ_UAPI_ATOMIC(member) smp_load_acquire(&(member).val) /* * Define write macro that uses smp_store_release memory barrier * when writing indice of circular buffer that mmaped to user space. */ #define RDMA_WRITE_UAPI_ATOMIC(member, x) smp_store_release(&(member).val, x) #include <rdma/rvt-abi.h> /* * This structure is used to contain the head pointer, tail pointer, * and completion queue entries as a single memory allocation so * it can be mmap'ed into user space. */ struct rvt_k_cq_wc { u32 head; /* index of next entry to fill */ u32 tail; /* index of next ib_poll_cq() entry */ struct ib_wc kqueue[]; }; /* * The completion queue structure. */ struct rvt_cq { struct ib_cq ibcq; struct work_struct comptask; spinlock_t lock; /* protect changes in this struct */ u8 notify; u8 triggered; u8 cq_full; int comp_vector_cpu; struct rvt_dev_info *rdi; struct rvt_cq_wc *queue; struct rvt_mmap_info *ip; struct rvt_k_cq_wc *kqueue; }; static inline struct rvt_cq *ibcq_to_rvtcq(struct ib_cq *ibcq) { return container_of(ibcq, struct rvt_cq, ibcq); } bool rvt_cq_enter(struct rvt_cq *cq, struct ib_wc *entry, bool solicited); #endif /* DEF_RDMAVT_INCCQH */ PK ! ��� � rdma/ib_umem.hnu �[��� /* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ /* * Copyright (c) 2007 Cisco Systems. All rights reserved. * Copyright (c) 2020 Intel Corporation. All rights reserved. */ #ifndef IB_UMEM_H #define IB_UMEM_H #include <linux/list.h> #include <linux/scatterlist.h> #include <linux/workqueue.h> #include <rdma/ib_verbs.h> struct ib_ucontext; struct ib_umem_odp; struct dma_buf_attach_ops; struct ib_umem { struct ib_device *ibdev; struct mm_struct *owning_mm; u64 iova; size_t length; unsigned long address; u32 writable : 1; u32 is_odp : 1; u32 is_dmabuf : 1; /* Placing at the end of the bitfield list is ABI preserving on LE */ u32 is_peer : 1; struct work_struct work; struct sg_append_table sgt_append; }; struct ib_umem_dmabuf { struct ib_umem umem; struct dma_buf_attachment *attach; struct sg_table *sgt; struct scatterlist *first_sg; struct scatterlist *last_sg; unsigned long first_sg_offset; unsigned long last_sg_trim; void *private; }; static inline struct ib_umem_dmabuf *to_ib_umem_dmabuf(struct ib_umem *umem) { return container_of(umem, struct ib_umem_dmabuf, umem); } typedef void (*umem_invalidate_func_t)(struct ib_umem *umem, void *priv); enum ib_peer_mem_flags { IB_PEER_MEM_ALLOW = 1 << 0, IB_PEER_MEM_INVAL_SUPP = 1 << 1, }; /* Returns the offset of the umem start relative to the first page. */ static inline int ib_umem_offset(struct ib_umem *umem) { return umem->address & ~PAGE_MASK; } static inline unsigned long ib_umem_dma_offset(struct ib_umem *umem, unsigned long pgsz) { return (sg_dma_address(umem->sgt_append.sgt.sgl) + ib_umem_offset(umem)) & (pgsz - 1); } static inline size_t ib_umem_num_dma_blocks(struct ib_umem *umem, unsigned long pgsz) { return (size_t)((ALIGN(umem->iova + umem->length, pgsz) - ALIGN_DOWN(umem->iova, pgsz))) / pgsz; } static inline size_t ib_umem_num_pages(struct ib_umem *umem) { return ib_umem_num_dma_blocks(umem, PAGE_SIZE); } static inline void __rdma_umem_block_iter_start(struct ib_block_iter *biter, struct ib_umem *umem, unsigned long pgsz) { __rdma_block_iter_start(biter, umem->sgt_append.sgt.sgl, umem->sgt_append.sgt.nents, pgsz); biter->__sg_advance = ib_umem_offset(umem) & ~(pgsz - 1); biter->__sg_numblocks = ib_umem_num_dma_blocks(umem, pgsz); } static inline bool __rdma_umem_block_iter_next(struct ib_block_iter *biter) { return __rdma_block_iter_next(biter) && biter->__sg_numblocks--; } /** * rdma_umem_for_each_dma_block - iterate over contiguous DMA blocks of the umem * @umem: umem to iterate over * @pgsz: Page size to split the list into * * pgsz must be <= PAGE_SIZE or computed by ib_umem_find_best_pgsz(). The * returned DMA blocks will be aligned to pgsz and span the range: * ALIGN_DOWN(umem->address, pgsz) to ALIGN(umem->address + umem->length, pgsz) * * Performs exactly ib_umem_num_dma_blocks() iterations. */ #define rdma_umem_for_each_dma_block(umem, biter, pgsz) \ for (__rdma_umem_block_iter_start(biter, umem, pgsz); \ __rdma_umem_block_iter_next(biter);) #ifdef CONFIG_INFINIBAND_USER_MEM struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr, size_t size, int access); void ib_umem_release(struct ib_umem *umem); int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset, size_t length); unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem, unsigned long pgsz_bitmap, unsigned long virt); /** * ib_umem_find_best_pgoff - Find best HW page size * * @umem: umem struct * @pgsz_bitmap bitmap of HW supported page sizes * @pgoff_bitmask: Mask of bits that can be represented with an offset * * This is very similar to ib_umem_find_best_pgsz() except instead of accepting * an IOVA it accepts a bitmask specifying what address bits can be represented * with a page offset. * * For instance if the HW has multiple page sizes, requires 64 byte alignemnt, * and can support aligned offsets up to 4032 then pgoff_bitmask would be * "111111000000". * * If the pgoff_bitmask requires either alignment in the low bit or an * unavailable page size for the high bits, this function returns 0. */ static inline unsigned long ib_umem_find_best_pgoff(struct ib_umem *umem, unsigned long pgsz_bitmap, u64 pgoff_bitmask) { struct scatterlist *sg = umem->sgt_append.sgt.sgl; dma_addr_t dma_addr; dma_addr = sg_dma_address(sg) + (umem->address & ~PAGE_MASK); return ib_umem_find_best_pgsz(umem, pgsz_bitmap, dma_addr & pgoff_bitmask); } struct ib_umem_dmabuf *ib_umem_dmabuf_get(struct ib_device *device, unsigned long offset, size_t size, int fd, int access, const struct dma_buf_attach_ops *ops); int ib_umem_dmabuf_map_pages(struct ib_umem_dmabuf *umem_dmabuf); void ib_umem_dmabuf_unmap_pages(struct ib_umem_dmabuf *umem_dmabuf); void ib_umem_dmabuf_release(struct ib_umem_dmabuf *umem_dmabuf); struct ib_umem *ib_umem_get_peer(struct ib_device *device, unsigned long addr, size_t size, int access, unsigned long peer_mem_flags); void ib_umem_activate_invalidation_notifier(struct ib_umem *umem, umem_invalidate_func_t func, void *cookie); void ib_umem_stop_invalidation_notifier(struct ib_umem *umem); #else /* CONFIG_INFINIBAND_USER_MEM */ #include <linux/err.h> static inline struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr, size_t size, int access) { return ERR_PTR(-EOPNOTSUPP); } static inline void ib_umem_release(struct ib_umem *umem) { } static inline int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset, size_t length) { return -EOPNOTSUPP; } static inline unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem, unsigned long pgsz_bitmap, unsigned long virt) { return 0; } static inline unsigned long ib_umem_find_best_pgoff(struct ib_umem *umem, unsigned long pgsz_bitmap, u64 pgoff_bitmask) { return 0; } static inline struct ib_umem_dmabuf *ib_umem_dmabuf_get(struct ib_device *device, unsigned long offset, size_t size, int fd, int access, struct dma_buf_attach_ops *ops) { return ERR_PTR(-EOPNOTSUPP); } static inline int ib_umem_dmabuf_map_pages(struct ib_umem_dmabuf *umem_dmabuf) { return -EOPNOTSUPP; } static inline void ib_umem_dmabuf_unmap_pages(struct ib_umem_dmabuf *umem_dmabuf) { } static inline void ib_umem_dmabuf_release(struct ib_umem_dmabuf *umem_dmabuf) { } static inline struct ib_umem *ib_umem_get_peer(struct ib_device *device, unsigned long addr, size_t size, int access, unsigned long peer_mem_flags) { return ERR_PTR(-EINVAL); } static inline void ib_umem_activate_invalidation_notifier( struct ib_umem *umem, umem_invalidate_func_t func, void *cookie) { } static inline void ib_umem_stop_invalidation_notifier(struct ib_umem *umem) { } #endif /* CONFIG_INFINIBAND_USER_MEM */ #endif /* IB_UMEM_H */ PK ! a�� rdma/iba.hnu �[��� /* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ /* * Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */ #ifndef _IBA_DEFS_H_ #define _IBA_DEFS_H_ #include <linux/kernel.h> #include <linux/bitfield.h> #include <asm/unaligned.h> static inline u32 _iba_get8(const u8 *ptr) { return *ptr; } static inline void _iba_set8(u8 *ptr, u32 mask, u32 prep_value) { *ptr = (*ptr & ~mask) | prep_value; } static inline u16 _iba_get16(const __be16 *ptr) { return be16_to_cpu(*ptr); } static inline void _iba_set16(__be16 *ptr, u16 mask, u16 prep_value) { *ptr = cpu_to_be16((be16_to_cpu(*ptr) & ~mask) | prep_value); } static inline u32 _iba_get32(const __be32 *ptr) { return be32_to_cpu(*ptr); } static inline void _iba_set32(__be32 *ptr, u32 mask, u32 prep_value) { *ptr = cpu_to_be32((be32_to_cpu(*ptr) & ~mask) | prep_value); } static inline u64 _iba_get64(const __be64 *ptr) { /* * The mads are constructed so that 32 bit and smaller are naturally * aligned, everything larger has a max alignment of 4 bytes. */ return be64_to_cpu(get_unaligned(ptr)); } static inline void _iba_set64(__be64 *ptr, u64 mask, u64 prep_value) { put_unaligned(cpu_to_be64((_iba_get64(ptr) & ~mask) | prep_value), ptr); } #define _IBA_SET(field_struct, field_offset, field_mask, num_bits, ptr, value) \ ({ \ field_struct *_ptr = ptr; \ _iba_set##num_bits((void *)_ptr + (field_offset), field_mask, \ FIELD_PREP(field_mask, value)); \ }) #define IBA_SET(field, ptr, value) _IBA_SET(field, ptr, value) #define _IBA_GET_MEM_PTR(field_struct, field_offset, type, num_bits, ptr) \ ({ \ field_struct *_ptr = ptr; \ (type *)((void *)_ptr + (field_offset)); \ }) #define IBA_GET_MEM_PTR(field, ptr) _IBA_GET_MEM_PTR(field, ptr) /* FIXME: A set should always set the entire field, meaning we should zero the trailing bytes */ #define _IBA_SET_MEM(field_struct, field_offset, type, num_bits, ptr, in, \ bytes) \ ({ \ const type *_in_ptr = in; \ WARN_ON(bytes * 8 > num_bits); \ if (in && bytes) \ memcpy(_IBA_GET_MEM_PTR(field_struct, field_offset, \ type, num_bits, ptr), \ _in_ptr, bytes); \ }) #define IBA_SET_MEM(field, ptr, in, bytes) _IBA_SET_MEM(field, ptr, in, bytes) #define _IBA_GET(field_struct, field_offset, field_mask, num_bits, ptr) \ ({ \ const field_struct *_ptr = ptr; \ (u##num_bits) FIELD_GET( \ field_mask, _iba_get##num_bits((const void *)_ptr + \ (field_offset))); \ }) #define IBA_GET(field, ptr) _IBA_GET(field, ptr) #define _IBA_GET_MEM(field_struct, field_offset, type, num_bits, ptr, out, \ bytes) \ ({ \ type *_out_ptr = out; \ WARN_ON(bytes * 8 > num_bits); \ if (out && bytes) \ memcpy(_out_ptr, \ _IBA_GET_MEM_PTR(field_struct, field_offset, \ type, num_bits, ptr), \ bytes); \ }) #define IBA_GET_MEM(field, ptr, out, bytes) _IBA_GET_MEM(field, ptr, out, bytes) /* * The generated list becomes the parameters to the macros, the order is: * - struct this applies to * - starting offset of the max * - GENMASK or GENMASK_ULL in CPU order * - The width of data the mask operations should work on, in bits */ /* * Extraction using a tabular description like table 106. bit_offset is from * the Byte[Bit] notation. */ #define IBA_FIELD_BLOC(field_struct, byte_offset, bit_offset, num_bits) \ field_struct, byte_offset, \ GENMASK(7 - (bit_offset), 7 - (bit_offset) - (num_bits - 1)), \ 8 #define IBA_FIELD8_LOC(field_struct, byte_offset, num_bits) \ IBA_FIELD_BLOC(field_struct, byte_offset, 0, num_bits) #define IBA_FIELD16_LOC(field_struct, byte_offset, num_bits) \ field_struct, (byte_offset)&0xFFFE, \ GENMASK(15 - (((byte_offset) % 2) * 8), \ 15 - (((byte_offset) % 2) * 8) - (num_bits - 1)), \ 16 #define IBA_FIELD32_LOC(field_struct, byte_offset, num_bits) \ field_struct, (byte_offset)&0xFFFC, \ GENMASK(31 - (((byte_offset) % 4) * 8), \ 31 - (((byte_offset) % 4) * 8) - (num_bits - 1)), \ 32 #define IBA_FIELD64_LOC(field_struct, byte_offset) \ field_struct, byte_offset, GENMASK_ULL(63, 0), 64 /* * In IBTA spec, everything that is more than 64bits is multiple * of bytes without leftover bits. */ #define IBA_FIELD_MLOC(field_struct, byte_offset, num_bits, type) \ field_struct, byte_offset, type, num_bits #endif /* _IBA_DEFS_H_ */ PK ! �3�ҮC �C rdma/ib_cm.hnu �[��� /* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ /* * Copyright (c) 2004, 2005 Intel Corporation. All rights reserved. * Copyright (c) 2004 Topspin Corporation. All rights reserved. * Copyright (c) 2004 Voltaire Corporation. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2019, Mellanox Technologies inc. All rights reserved. */ #ifndef IB_CM_H #define IB_CM_H #include <rdma/ib_mad.h> #include <rdma/ib_sa.h> #include <rdma/rdma_cm.h> enum ib_cm_state { IB_CM_IDLE, IB_CM_LISTEN, IB_CM_REQ_SENT, IB_CM_REQ_RCVD, IB_CM_MRA_REQ_SENT, IB_CM_MRA_REQ_RCVD, IB_CM_REP_SENT, IB_CM_REP_RCVD, IB_CM_MRA_REP_SENT, IB_CM_MRA_REP_RCVD, IB_CM_ESTABLISHED, IB_CM_DREQ_SENT, IB_CM_DREQ_RCVD, IB_CM_TIMEWAIT, IB_CM_SIDR_REQ_SENT, IB_CM_SIDR_REQ_RCVD }; enum ib_cm_lap_state { IB_CM_LAP_UNINIT, IB_CM_LAP_IDLE, IB_CM_LAP_SENT, IB_CM_LAP_RCVD, IB_CM_MRA_LAP_SENT, IB_CM_MRA_LAP_RCVD, }; enum ib_cm_event_type { IB_CM_REQ_ERROR, IB_CM_REQ_RECEIVED, IB_CM_REP_ERROR, IB_CM_REP_RECEIVED, IB_CM_RTU_RECEIVED, IB_CM_USER_ESTABLISHED, IB_CM_DREQ_ERROR, IB_CM_DREQ_RECEIVED, IB_CM_DREP_RECEIVED, IB_CM_TIMEWAIT_EXIT, IB_CM_MRA_RECEIVED, IB_CM_REJ_RECEIVED, IB_CM_LAP_ERROR, IB_CM_LAP_RECEIVED, IB_CM_APR_RECEIVED, IB_CM_SIDR_REQ_ERROR, IB_CM_SIDR_REQ_RECEIVED, IB_CM_SIDR_REP_RECEIVED }; enum ib_cm_data_size { IB_CM_REQ_PRIVATE_DATA_SIZE = 92, IB_CM_MRA_PRIVATE_DATA_SIZE = 222, IB_CM_REJ_PRIVATE_DATA_SIZE = 148, IB_CM_REP_PRIVATE_DATA_SIZE = 196, IB_CM_RTU_PRIVATE_DATA_SIZE = 224, IB_CM_DREQ_PRIVATE_DATA_SIZE = 220, IB_CM_DREP_PRIVATE_DATA_SIZE = 224, IB_CM_REJ_ARI_LENGTH = 72, IB_CM_LAP_PRIVATE_DATA_SIZE = 168, IB_CM_APR_PRIVATE_DATA_SIZE = 148, IB_CM_APR_INFO_LENGTH = 72, IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE = 216, IB_CM_SIDR_REP_PRIVATE_DATA_SIZE = 136, IB_CM_SIDR_REP_INFO_LENGTH = 72, }; struct ib_cm_id; struct ib_cm_req_event_param { struct ib_cm_id *listen_id; /* P_Key that was used by the GMP's BTH header */ u16 bth_pkey; u8 port; struct sa_path_rec *primary_path; struct sa_path_rec *alternate_path; /* * SGID attribute of the primary path. Currently only * useful for RoCE. Alternate path GID attributes * are not yet supported. */ const struct ib_gid_attr *ppath_sgid_attr; __be64 remote_ca_guid; u32 remote_qkey; u32 remote_qpn; enum ib_qp_type qp_type; u32 starting_psn; u8 responder_resources; u8 initiator_depth; unsigned int local_cm_response_timeout:5; unsigned int flow_control:1; unsigned int remote_cm_response_timeout:5; unsigned int retry_count:3; unsigned int rnr_retry_count:3; unsigned int srq:1; struct rdma_ucm_ece ece; }; struct ib_cm_rep_event_param { __be64 remote_ca_guid; u32 remote_qkey; u32 remote_qpn; u32 starting_psn; u8 responder_resources; u8 initiator_depth; unsigned int target_ack_delay:5; unsigned int failover_accepted:2; unsigned int flow_control:1; unsigned int rnr_retry_count:3; unsigned int srq:1; struct rdma_ucm_ece ece; }; enum ib_cm_rej_reason { IB_CM_REJ_NO_QP = 1, IB_CM_REJ_NO_EEC = 2, IB_CM_REJ_NO_RESOURCES = 3, IB_CM_REJ_TIMEOUT = 4, IB_CM_REJ_UNSUPPORTED = 5, IB_CM_REJ_INVALID_COMM_ID = 6, IB_CM_REJ_INVALID_COMM_INSTANCE = 7, IB_CM_REJ_INVALID_SERVICE_ID = 8, IB_CM_REJ_INVALID_TRANSPORT_TYPE = 9, IB_CM_REJ_STALE_CONN = 10, IB_CM_REJ_RDC_NOT_EXIST = 11, IB_CM_REJ_INVALID_GID = 12, IB_CM_REJ_INVALID_LID = 13, IB_CM_REJ_INVALID_SL = 14, IB_CM_REJ_INVALID_TRAFFIC_CLASS = 15, IB_CM_REJ_INVALID_HOP_LIMIT = 16, IB_CM_REJ_INVALID_PACKET_RATE = 17, IB_CM_REJ_INVALID_ALT_GID = 18, IB_CM_REJ_INVALID_ALT_LID = 19, IB_CM_REJ_INVALID_ALT_SL = 20, IB_CM_REJ_INVALID_ALT_TRAFFIC_CLASS = 21, IB_CM_REJ_INVALID_ALT_HOP_LIMIT = 22, IB_CM_REJ_INVALID_ALT_PACKET_RATE = 23, IB_CM_REJ_PORT_CM_REDIRECT = 24, IB_CM_REJ_PORT_REDIRECT = 25, IB_CM_REJ_INVALID_MTU = 26, IB_CM_REJ_INSUFFICIENT_RESP_RESOURCES = 27, IB_CM_REJ_CONSUMER_DEFINED = 28, IB_CM_REJ_INVALID_RNR_RETRY = 29, IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID = 30, IB_CM_REJ_INVALID_CLASS_VERSION = 31, IB_CM_REJ_INVALID_FLOW_LABEL = 32, IB_CM_REJ_INVALID_ALT_FLOW_LABEL = 33, IB_CM_REJ_VENDOR_OPTION_NOT_SUPPORTED = 35, }; struct ib_cm_rej_event_param { enum ib_cm_rej_reason reason; void *ari; u8 ari_length; }; struct ib_cm_mra_event_param { u8 service_timeout; }; struct ib_cm_lap_event_param { struct sa_path_rec *alternate_path; }; enum ib_cm_apr_status { IB_CM_APR_SUCCESS, IB_CM_APR_INVALID_COMM_ID, IB_CM_APR_UNSUPPORTED, IB_CM_APR_REJECT, IB_CM_APR_REDIRECT, IB_CM_APR_IS_CURRENT, IB_CM_APR_INVALID_QPN_EECN, IB_CM_APR_INVALID_LID, IB_CM_APR_INVALID_GID, IB_CM_APR_INVALID_FLOW_LABEL, IB_CM_APR_INVALID_TCLASS, IB_CM_APR_INVALID_HOP_LIMIT, IB_CM_APR_INVALID_PACKET_RATE, IB_CM_APR_INVALID_SL }; struct ib_cm_apr_event_param { enum ib_cm_apr_status ap_status; void *apr_info; u8 info_len; }; struct ib_cm_sidr_req_event_param { struct ib_cm_id *listen_id; __be64 service_id; /* * SGID attribute of the request. Currently only * useful for RoCE. */ const struct ib_gid_attr *sgid_attr; /* P_Key that was used by the GMP's BTH header */ u16 bth_pkey; u8 port; u16 pkey; }; enum ib_cm_sidr_status { IB_SIDR_SUCCESS, IB_SIDR_UNSUPPORTED, IB_SIDR_REJECT, IB_SIDR_NO_QP, IB_SIDR_REDIRECT, IB_SIDR_UNSUPPORTED_VERSION }; struct ib_cm_sidr_rep_event_param { enum ib_cm_sidr_status status; u32 qkey; u32 qpn; void *info; const struct ib_gid_attr *sgid_attr; u8 info_len; }; struct ib_cm_event { enum ib_cm_event_type event; union { struct ib_cm_req_event_param req_rcvd; struct ib_cm_rep_event_param rep_rcvd; /* No data for RTU received events. */ struct ib_cm_rej_event_param rej_rcvd; struct ib_cm_mra_event_param mra_rcvd; struct ib_cm_lap_event_param lap_rcvd; struct ib_cm_apr_event_param apr_rcvd; /* No data for DREQ/DREP received events. */ struct ib_cm_sidr_req_event_param sidr_req_rcvd; struct ib_cm_sidr_rep_event_param sidr_rep_rcvd; enum ib_wc_status send_status; } param; void *private_data; }; #define CM_REQ_ATTR_ID cpu_to_be16(0x0010) #define CM_MRA_ATTR_ID cpu_to_be16(0x0011) #define CM_REJ_ATTR_ID cpu_to_be16(0x0012) #define CM_REP_ATTR_ID cpu_to_be16(0x0013) #define CM_RTU_ATTR_ID cpu_to_be16(0x0014) #define CM_DREQ_ATTR_ID cpu_to_be16(0x0015) #define CM_DREP_ATTR_ID cpu_to_be16(0x0016) #define CM_SIDR_REQ_ATTR_ID cpu_to_be16(0x0017) #define CM_SIDR_REP_ATTR_ID cpu_to_be16(0x0018) #define CM_LAP_ATTR_ID cpu_to_be16(0x0019) #define CM_APR_ATTR_ID cpu_to_be16(0x001A) /** * ib_cm_handler - User-defined callback to process communication events. * @cm_id: Communication identifier associated with the reported event. * @event: Information about the communication event. * * IB_CM_REQ_RECEIVED and IB_CM_SIDR_REQ_RECEIVED communication events * generated as a result of listen requests result in the allocation of a * new @cm_id. The new @cm_id is returned to the user through this callback. * Clients are responsible for destroying the new @cm_id. For peer-to-peer * IB_CM_REQ_RECEIVED and all other events, the returned @cm_id corresponds * to a user's existing communication identifier. * * Users may not call ib_destroy_cm_id while in the context of this callback; * however, returning a non-zero value instructs the communication manager to * destroy the @cm_id after the callback completes. */ typedef int (*ib_cm_handler)(struct ib_cm_id *cm_id, const struct ib_cm_event *event); struct ib_cm_id { ib_cm_handler cm_handler; void *context; struct ib_device *device; __be64 service_id; __be64 service_mask; enum ib_cm_state state; /* internal CM/debug use */ enum ib_cm_lap_state lap_state; /* internal CM/debug use */ __be32 local_id; __be32 remote_id; u32 remote_cm_qpn; /* 1 unless redirected */ }; /** * ib_create_cm_id - Allocate a communication identifier. * @device: Device associated with the cm_id. All related communication will * be associated with the specified device. * @cm_handler: Callback invoked to notify the user of CM events. * @context: User specified context associated with the communication * identifier. * * Communication identifiers are used to track connection states, service * ID resolution requests, and listen requests. */ struct ib_cm_id *ib_create_cm_id(struct ib_device *device, ib_cm_handler cm_handler, void *context); /** * ib_destroy_cm_id - Destroy a connection identifier. * @cm_id: Connection identifier to destroy. * * This call blocks until the connection identifier is destroyed. */ void ib_destroy_cm_id(struct ib_cm_id *cm_id); #define IB_SERVICE_ID_AGN_MASK cpu_to_be64(0xFF00000000000000ULL) #define IB_CM_ASSIGN_SERVICE_ID cpu_to_be64(0x0200000000000000ULL) #define IB_CMA_SERVICE_ID cpu_to_be64(0x0000000001000000ULL) #define IB_CMA_SERVICE_ID_MASK cpu_to_be64(0xFFFFFFFFFF000000ULL) #define IB_SDP_SERVICE_ID cpu_to_be64(0x0000000000010000ULL) #define IB_SDP_SERVICE_ID_MASK cpu_to_be64(0xFFFFFFFFFFFF0000ULL) /** * ib_cm_listen - Initiates listening on the specified service ID for * connection and service ID resolution requests. * @cm_id: Connection identifier associated with the listen request. * @service_id: Service identifier matched against incoming connection * and service ID resolution requests. The service ID should be specified * network-byte order. If set to IB_CM_ASSIGN_SERVICE_ID, the CM will * assign a service ID to the caller. * @service_mask: Mask applied to service ID used to listen across a * range of service IDs. If set to 0, the service ID is matched * exactly. This parameter is ignored if %service_id is set to * IB_CM_ASSIGN_SERVICE_ID. */ int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id, __be64 service_mask); struct ib_cm_id *ib_cm_insert_listen(struct ib_device *device, ib_cm_handler cm_handler, __be64 service_id); struct ib_cm_req_param { struct sa_path_rec *primary_path; struct sa_path_rec *alternate_path; const struct ib_gid_attr *ppath_sgid_attr; __be64 service_id; u32 qp_num; enum ib_qp_type qp_type; u32 starting_psn; const void *private_data; u8 private_data_len; u8 responder_resources; u8 initiator_depth; u8 remote_cm_response_timeout; u8 flow_control; u8 local_cm_response_timeout; u8 retry_count; u8 rnr_retry_count; u8 max_cm_retries; u8 srq; struct rdma_ucm_ece ece; }; /** * ib_send_cm_req - Sends a connection request to the remote node. * @cm_id: Connection identifier that will be associated with the * connection request. * @param: Connection request information needed to establish the * connection. */ int ib_send_cm_req(struct ib_cm_id *cm_id, struct ib_cm_req_param *param); struct ib_cm_rep_param { u32 qp_num; u32 starting_psn; const void *private_data; u8 private_data_len; u8 responder_resources; u8 initiator_depth; u8 failover_accepted; u8 flow_control; u8 rnr_retry_count; u8 srq; struct rdma_ucm_ece ece; }; /** * ib_send_cm_rep - Sends a connection reply in response to a connection * request. * @cm_id: Connection identifier that will be associated with the * connection request. * @param: Connection reply information needed to establish the * connection. */ int ib_send_cm_rep(struct ib_cm_id *cm_id, struct ib_cm_rep_param *param); /** * ib_send_cm_rtu - Sends a connection ready to use message in response * to a connection reply message. * @cm_id: Connection identifier associated with the connection request. * @private_data: Optional user-defined private data sent with the * ready to use message. * @private_data_len: Size of the private data buffer, in bytes. */ int ib_send_cm_rtu(struct ib_cm_id *cm_id, const void *private_data, u8 private_data_len); /** * ib_send_cm_dreq - Sends a disconnection request for an existing * connection. * @cm_id: Connection identifier associated with the connection being * released. * @private_data: Optional user-defined private data sent with the * disconnection request message. * @private_data_len: Size of the private data buffer, in bytes. */ int ib_send_cm_dreq(struct ib_cm_id *cm_id, const void *private_data, u8 private_data_len); /** * ib_send_cm_drep - Sends a disconnection reply to a disconnection request. * @cm_id: Connection identifier associated with the connection being * released. * @private_data: Optional user-defined private data sent with the * disconnection reply message. * @private_data_len: Size of the private data buffer, in bytes. * * If the cm_id is in the correct state, the CM will transition the connection * to the timewait state, even if an error occurs sending the DREP message. */ int ib_send_cm_drep(struct ib_cm_id *cm_id, const void *private_data, u8 private_data_len); /** * ib_cm_notify - Notifies the CM of an event reported to the consumer. * @cm_id: Connection identifier to transition to established. * @event: Type of event. * * This routine should be invoked by users to notify the CM of relevant * communication events. Events that should be reported to the CM and * when to report them are: * * IB_EVENT_COMM_EST - Used when a message is received on a connected * QP before an RTU has been received. * IB_EVENT_PATH_MIG - Notifies the CM that the connection has failed over * to the alternate path. */ int ib_cm_notify(struct ib_cm_id *cm_id, enum ib_event_type event); /** * ib_send_cm_rej - Sends a connection rejection message to the * remote node. * @cm_id: Connection identifier associated with the connection being * rejected. * @reason: Reason for the connection request rejection. * @ari: Optional additional rejection information. * @ari_length: Size of the additional rejection information, in bytes. * @private_data: Optional user-defined private data sent with the * rejection message. * @private_data_len: Size of the private data buffer, in bytes. */ int ib_send_cm_rej(struct ib_cm_id *cm_id, enum ib_cm_rej_reason reason, void *ari, u8 ari_length, const void *private_data, u8 private_data_len); #define IB_CM_MRA_FLAG_DELAY 0x80 /* Send MRA only after a duplicate msg */ /** * ib_send_cm_mra - Sends a message receipt acknowledgement to a connection * message. * @cm_id: Connection identifier associated with the connection message. * @service_timeout: The lower 5-bits specify the maximum time required for * the sender to reply to the connection message. The upper 3-bits * specify additional control flags. * @private_data: Optional user-defined private data sent with the * message receipt acknowledgement. * @private_data_len: Size of the private data buffer, in bytes. */ int ib_send_cm_mra(struct ib_cm_id *cm_id, u8 service_timeout, const void *private_data, u8 private_data_len); /** * ib_cm_init_qp_attr - Initializes the QP attributes for use in transitioning * to a specified QP state. * @cm_id: Communication identifier associated with the QP attributes to * initialize. * @qp_attr: On input, specifies the desired QP state. On output, the * mandatory and desired optional attributes will be set in order to * modify the QP to the specified state. * @qp_attr_mask: The QP attribute mask that may be used to transition the * QP to the specified state. * * Users must set the @qp_attr->qp_state to the desired QP state. This call * will set all required attributes for the given transition, along with * known optional attributes. Users may override the attributes returned from * this call before calling ib_modify_qp. */ int ib_cm_init_qp_attr(struct ib_cm_id *cm_id, struct ib_qp_attr *qp_attr, int *qp_attr_mask); struct ib_cm_sidr_req_param { struct sa_path_rec *path; const struct ib_gid_attr *sgid_attr; __be64 service_id; unsigned long timeout_ms; const void *private_data; u8 private_data_len; u8 max_cm_retries; }; /** * ib_send_cm_sidr_req - Sends a service ID resolution request to the * remote node. * @cm_id: Communication identifier that will be associated with the * service ID resolution request. * @param: Service ID resolution request information. */ int ib_send_cm_sidr_req(struct ib_cm_id *cm_id, struct ib_cm_sidr_req_param *param); struct ib_cm_sidr_rep_param { u32 qp_num; u32 qkey; enum ib_cm_sidr_status status; const void *info; u8 info_length; const void *private_data; u8 private_data_len; struct rdma_ucm_ece ece; }; /** * ib_send_cm_sidr_rep - Sends a service ID resolution reply to the * remote node. * @cm_id: Communication identifier associated with the received service ID * resolution request. * @param: Service ID resolution reply information. */ int ib_send_cm_sidr_rep(struct ib_cm_id *cm_id, struct ib_cm_sidr_rep_param *param); /** * ibcm_reject_msg - return a pointer to a reject message string. * @reason: Value returned in the REJECT event status field. */ const char *__attribute_const__ ibcm_reject_msg(int reason); #endif /* IB_CM_H */ PK ! �v� � rdma/signature.hnu �[��� /* SPDX-License-Identifier: (GPL-2.0 OR Linux-OpenIB) */ /* * Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved. */ #ifndef _RDMA_SIGNATURE_H_ #define _RDMA_SIGNATURE_H_ #include <linux/types.h> enum ib_signature_prot_cap { IB_PROT_T10DIF_TYPE_1 = 1, IB_PROT_T10DIF_TYPE_2 = 1 << 1, IB_PROT_T10DIF_TYPE_3 = 1 << 2, }; enum ib_signature_guard_cap { IB_GUARD_T10DIF_CRC = 1, IB_GUARD_T10DIF_CSUM = 1 << 1, }; /** * enum ib_signature_type - Signature types * @IB_SIG_TYPE_NONE: Unprotected. * @IB_SIG_TYPE_T10_DIF: Type T10-DIF */ enum ib_signature_type { IB_SIG_TYPE_NONE, IB_SIG_TYPE_T10_DIF, }; /** * enum ib_t10_dif_bg_type - Signature T10-DIF block-guard types * @IB_T10DIF_CRC: Corresponds to T10-PI mandated CRC checksum rules. * @IB_T10DIF_CSUM: Corresponds to IP checksum rules. */ enum ib_t10_dif_bg_type { IB_T10DIF_CRC, IB_T10DIF_CSUM, }; /** * struct ib_t10_dif_domain - Parameters specific for T10-DIF * domain. * @bg_type: T10-DIF block guard type (CRC|CSUM) * @pi_interval: protection information interval. * @bg: seed of guard computation. * @app_tag: application tag of guard block * @ref_tag: initial guard block reference tag. * @ref_remap: Indicate wethear the reftag increments each block * @app_escape: Indicate to skip block check if apptag=0xffff * @ref_escape: Indicate to skip block check if reftag=0xffffffff * @apptag_check_mask: check bitmask of application tag. */ struct ib_t10_dif_domain { enum ib_t10_dif_bg_type bg_type; u16 pi_interval; u16 bg; u16 app_tag; u32 ref_tag; bool ref_remap; bool app_escape; bool ref_escape; u16 apptag_check_mask; }; /** * struct ib_sig_domain - Parameters for signature domain * @sig_type: specific signauture type * @sig: union of all signature domain attributes that may * be used to set domain layout. */ struct ib_sig_domain { enum ib_signature_type sig_type; union { struct ib_t10_dif_domain dif; } sig; }; /** * struct ib_sig_attrs - Parameters for signature handover operation * @check_mask: bitmask for signature byte check (8 bytes) * @mem: memory domain layout descriptor. * @wire: wire domain layout descriptor. * @meta_length: metadata length */ struct ib_sig_attrs { u8 check_mask; struct ib_sig_domain mem; struct ib_sig_domain wire; int meta_length; }; enum ib_sig_err_type { IB_SIG_BAD_GUARD, IB_SIG_BAD_REFTAG, IB_SIG_BAD_APPTAG, }; /* * Signature check masks (8 bytes in total) according to the T10-PI standard: * -------- -------- ------------ * | GUARD | APPTAG | REFTAG | * | 2B | 2B | 4B | * -------- -------- ------------ */ enum { IB_SIG_CHECK_GUARD = 0xc0, IB_SIG_CHECK_APPTAG = 0x30, IB_SIG_CHECK_REFTAG = 0x0f, }; /* * struct ib_sig_err - signature error descriptor */ struct ib_sig_err { enum ib_sig_err_type err_type; u32 expected; u32 actual; u64 sig_err_offset; u32 key; }; #endif /* _RDMA_SIGNATURE_H_ */ PK ! �(}&� � rdma/restrack.hnu �[��� /* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ /* * Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved. */ #ifndef _RDMA_RESTRACK_H_ #define _RDMA_RESTRACK_H_ #include <linux/typecheck.h> #include <linux/sched.h> #include <linux/kref.h> #include <linux/completion.h> #include <linux/sched/task.h> #include <uapi/rdma/rdma_netlink.h> #include <linux/xarray.h> struct ib_device; struct sk_buff; /** * enum rdma_restrack_type - HW objects to track */ enum rdma_restrack_type { /** * @RDMA_RESTRACK_PD: Protection domain (PD) */ RDMA_RESTRACK_PD, /** * @RDMA_RESTRACK_CQ: Completion queue (CQ) */ RDMA_RESTRACK_CQ, /** * @RDMA_RESTRACK_QP: Queue pair (QP) */ RDMA_RESTRACK_QP, /** * @RDMA_RESTRACK_CM_ID: Connection Manager ID (CM_ID) */ RDMA_RESTRACK_CM_ID, /** * @RDMA_RESTRACK_MR: Memory Region (MR) */ RDMA_RESTRACK_MR, /** * @RDMA_RESTRACK_CTX: Verbs contexts (CTX) */ RDMA_RESTRACK_CTX, /** * @RDMA_RESTRACK_COUNTER: Statistic Counter */ RDMA_RESTRACK_COUNTER, /** * @RDMA_RESTRACK_SRQ: Shared receive queue (SRQ) */ RDMA_RESTRACK_SRQ, /** * @RDMA_RESTRACK_MAX: Last entry, used for array dclarations */ RDMA_RESTRACK_MAX }; /** * struct rdma_restrack_entry - metadata per-entry */ struct rdma_restrack_entry { /** * @valid: validity indicator * * The entries are filled during rdma_restrack_add, * can be attempted to be free during rdma_restrack_del. * * As an example for that, see mlx5 QPs with type MLX5_IB_QPT_HW_GSI */ bool valid; /** * @no_track: don't add this entry to restrack DB * * This field is used to mark an entry that doesn't need to be added to * internal restrack DB and presented later to the users at the nldev * query stage. */ u8 no_track : 1; /* * @kref: Protect destroy of the resource */ struct kref kref; /* * @comp: Signal that all consumers of resource are completed their work */ struct completion comp; /** * @task: owner of resource tracking entity * * There are two types of entities: created by user and created * by kernel. * * This is relevant for the entities created by users. * For the entities created by kernel, this pointer will be NULL. */ struct task_struct *task; /** * @kern_name: name of owner for the kernel created entities. */ const char *kern_name; /** * @type: various objects in restrack database */ enum rdma_restrack_type type; /** * @user: user resource */ bool user; /** * @id: ID to expose to users */ u32 id; }; int rdma_restrack_count(struct ib_device *dev, enum rdma_restrack_type type); /** * rdma_is_kernel_res() - check the owner of resource * @res: resource entry */ static inline bool rdma_is_kernel_res(const struct rdma_restrack_entry *res) { return !res->user; } /** * rdma_restrack_get() - grab to protect resource from release * @res: resource entry */ int __must_check rdma_restrack_get(struct rdma_restrack_entry *res); /** * rdma_restrack_put() - release resource * @res: resource entry */ int rdma_restrack_put(struct rdma_restrack_entry *res); /* * Helper functions for rdma drivers when filling out * nldev driver attributes. */ int rdma_nl_put_driver_u32(struct sk_buff *msg, const char *name, u32 value); int rdma_nl_put_driver_u32_hex(struct sk_buff *msg, const char *name, u32 value); int rdma_nl_put_driver_u64(struct sk_buff *msg, const char *name, u64 value); int rdma_nl_put_driver_u64_hex(struct sk_buff *msg, const char *name, u64 value); int rdma_nl_put_driver_string(struct sk_buff *msg, const char *name, const char *str); int rdma_nl_stat_hwcounter_entry(struct sk_buff *msg, const char *name, u64 value); struct rdma_restrack_entry *rdma_restrack_get_byid(struct ib_device *dev, enum rdma_restrack_type type, u32 id); /** * rdma_restrack_no_track() - don't add resource to the DB * @res: resource entry * * Every user of thie API should be cross examined. * Probaby you don't need to use this function. */ static inline void rdma_restrack_no_track(struct rdma_restrack_entry *res) { res->no_track = true; } static inline bool rdma_restrack_is_tracked(struct rdma_restrack_entry *res) { return !res->no_track; } #endif /* _RDMA_RESTRACK_H_ */ PK ! ��Q��5 �5 rdma/opa_port_info.hnu �[��� /* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ /* * Copyright (c) 2014-2020 Intel Corporation. All rights reserved. */ #ifndef OPA_PORT_INFO_H #define OPA_PORT_INFO_H #include <rdma/opa_smi.h> #define OPA_PORT_LINK_MODE_NOP 0 /* No change */ #define OPA_PORT_LINK_MODE_OPA 4 /* Port mode is OPA */ #define OPA_PORT_PACKET_FORMAT_NOP 0 /* No change */ #define OPA_PORT_PACKET_FORMAT_8B 1 /* Format 8B */ #define OPA_PORT_PACKET_FORMAT_9B 2 /* Format 9B */ #define OPA_PORT_PACKET_FORMAT_10B 4 /* Format 10B */ #define OPA_PORT_PACKET_FORMAT_16B 8 /* Format 16B */ #define OPA_PORT_LTP_CRC_MODE_NONE 0 /* No change */ #define OPA_PORT_LTP_CRC_MODE_14 1 /* 14-bit LTP CRC mode (optional) */ #define OPA_PORT_LTP_CRC_MODE_16 2 /* 16-bit LTP CRC mode */ #define OPA_PORT_LTP_CRC_MODE_48 4 /* 48-bit LTP CRC mode (optional) */ #define OPA_PORT_LTP_CRC_MODE_PER_LANE 8 /* 12/16-bit per lane LTP CRC mode */ /* Link Down / Neighbor Link Down Reason; indicated as follows: */ #define OPA_LINKDOWN_REASON_NONE 0 /* No specified reason */ #define OPA_LINKDOWN_REASON_RCV_ERROR_0 1 #define OPA_LINKDOWN_REASON_BAD_PKT_LEN 2 #define OPA_LINKDOWN_REASON_PKT_TOO_LONG 3 #define OPA_LINKDOWN_REASON_PKT_TOO_SHORT 4 #define OPA_LINKDOWN_REASON_BAD_SLID 5 #define OPA_LINKDOWN_REASON_BAD_DLID 6 #define OPA_LINKDOWN_REASON_BAD_L2 7 #define OPA_LINKDOWN_REASON_BAD_SC 8 #define OPA_LINKDOWN_REASON_RCV_ERROR_8 9 #define OPA_LINKDOWN_REASON_BAD_MID_TAIL 10 #define OPA_LINKDOWN_REASON_RCV_ERROR_10 11 #define OPA_LINKDOWN_REASON_PREEMPT_ERROR 12 #define OPA_LINKDOWN_REASON_PREEMPT_VL15 13 #define OPA_LINKDOWN_REASON_BAD_VL_MARKER 14 #define OPA_LINKDOWN_REASON_RCV_ERROR_14 15 #define OPA_LINKDOWN_REASON_RCV_ERROR_15 16 #define OPA_LINKDOWN_REASON_BAD_HEAD_DIST 17 #define OPA_LINKDOWN_REASON_BAD_TAIL_DIST 18 #define OPA_LINKDOWN_REASON_BAD_CTRL_DIST 19 #define OPA_LINKDOWN_REASON_BAD_CREDIT_ACK 20 #define OPA_LINKDOWN_REASON_UNSUPPORTED_VL_MARKER 21 #define OPA_LINKDOWN_REASON_BAD_PREEMPT 22 #define OPA_LINKDOWN_REASON_BAD_CONTROL_FLIT 23 #define OPA_LINKDOWN_REASON_EXCEED_MULTICAST_LIMIT 24 #define OPA_LINKDOWN_REASON_RCV_ERROR_24 25 #define OPA_LINKDOWN_REASON_RCV_ERROR_25 26 #define OPA_LINKDOWN_REASON_RCV_ERROR_26 27 #define OPA_LINKDOWN_REASON_RCV_ERROR_27 28 #define OPA_LINKDOWN_REASON_RCV_ERROR_28 29 #define OPA_LINKDOWN_REASON_RCV_ERROR_29 30 #define OPA_LINKDOWN_REASON_RCV_ERROR_30 31 #define OPA_LINKDOWN_REASON_EXCESSIVE_BUFFER_OVERRUN 32 #define OPA_LINKDOWN_REASON_UNKNOWN 33 /* 34 -reserved */ #define OPA_LINKDOWN_REASON_REBOOT 35 #define OPA_LINKDOWN_REASON_NEIGHBOR_UNKNOWN 36 /* 37-38 reserved */ #define OPA_LINKDOWN_REASON_FM_BOUNCE 39 #define OPA_LINKDOWN_REASON_SPEED_POLICY 40 #define OPA_LINKDOWN_REASON_WIDTH_POLICY 41 /* 42-48 reserved */ #define OPA_LINKDOWN_REASON_DISCONNECTED 49 #define OPA_LINKDOWN_REASON_LOCAL_MEDIA_NOT_INSTALLED 50 #define OPA_LINKDOWN_REASON_NOT_INSTALLED 51 #define OPA_LINKDOWN_REASON_CHASSIS_CONFIG 52 /* 53 reserved */ #define OPA_LINKDOWN_REASON_END_TO_END_NOT_INSTALLED 54 /* 55 reserved */ #define OPA_LINKDOWN_REASON_POWER_POLICY 56 #define OPA_LINKDOWN_REASON_LINKSPEED_POLICY 57 #define OPA_LINKDOWN_REASON_LINKWIDTH_POLICY 58 /* 59 reserved */ #define OPA_LINKDOWN_REASON_SWITCH_MGMT 60 #define OPA_LINKDOWN_REASON_SMA_DISABLED 61 /* 62 reserved */ #define OPA_LINKDOWN_REASON_TRANSIENT 63 /* 64-255 reserved */ /* OPA Link Init reason; indicated as follows: */ /* 3-7; 11-15 reserved; 8-15 cleared on Polling->LinkUp */ #define OPA_LINKINIT_REASON_NOP 0 #define OPA_LINKINIT_REASON_LINKUP (1 << 4) #define OPA_LINKINIT_REASON_FLAPPING (2 << 4) #define OPA_LINKINIT_REASON_CLEAR (8 << 4) #define OPA_LINKINIT_OUTSIDE_POLICY (8 << 4) #define OPA_LINKINIT_QUARANTINED (9 << 4) #define OPA_LINKINIT_INSUFIC_CAPABILITY (10 << 4) #define OPA_LINK_SPEED_NOP 0x0000 /* Reserved (1-5 Gbps) */ #define OPA_LINK_SPEED_12_5G 0x0001 /* 12.5 Gbps */ #define OPA_LINK_SPEED_25G 0x0002 /* 25.78125? Gbps (EDR) */ #define OPA_LINK_WIDTH_1X 0x0001 #define OPA_LINK_WIDTH_2X 0x0002 #define OPA_LINK_WIDTH_3X 0x0004 #define OPA_LINK_WIDTH_4X 0x0008 #define OPA_CAP_MASK3_IsEthOnFabricSupported (1 << 13) #define OPA_CAP_MASK3_IsSnoopSupported (1 << 7) #define OPA_CAP_MASK3_IsAsyncSC2VLSupported (1 << 6) #define OPA_CAP_MASK3_IsAddrRangeConfigSupported (1 << 5) #define OPA_CAP_MASK3_IsPassThroughSupported (1 << 4) #define OPA_CAP_MASK3_IsSharedSpaceSupported (1 << 3) /* reserved (1 << 2) */ #define OPA_CAP_MASK3_IsVLMarkerSupported (1 << 1) #define OPA_CAP_MASK3_IsVLrSupported (1 << 0) enum { OPA_PORT_PHYS_CONF_DISCONNECTED = 0, OPA_PORT_PHYS_CONF_STANDARD = 1, OPA_PORT_PHYS_CONF_FIXED = 2, OPA_PORT_PHYS_CONF_VARIABLE = 3, OPA_PORT_PHYS_CONF_SI_PHOTO = 4 }; enum port_info_field_masks { /* vl.cap */ OPA_PI_MASK_VL_CAP = 0x1F, /* port_states.ledenable_offlinereason */ OPA_PI_MASK_OFFLINE_REASON = 0x0F, OPA_PI_MASK_LED_ENABLE = 0x40, /* port_states.unsleepstate_downdefstate */ OPA_PI_MASK_UNSLEEP_STATE = 0xF0, OPA_PI_MASK_DOWNDEF_STATE = 0x0F, /* port_states.portphysstate_portstate */ OPA_PI_MASK_PORT_PHYSICAL_STATE = 0xF0, OPA_PI_MASK_PORT_STATE = 0x0F, /* port_phys_conf */ OPA_PI_MASK_PORT_PHYSICAL_CONF = 0x0F, /* collectivemask_multicastmask */ OPA_PI_MASK_COLLECT_MASK = 0x38, OPA_PI_MASK_MULTICAST_MASK = 0x07, /* mkeyprotect_lmc */ OPA_PI_MASK_MKEY_PROT_BIT = 0xC0, OPA_PI_MASK_LMC = 0x0F, /* smsl */ OPA_PI_MASK_SMSL = 0x1F, /* partenforce_filterraw */ /* Filter Raw In/Out bits 1 and 2 were removed */ OPA_PI_MASK_LINKINIT_REASON = 0xF0, OPA_PI_MASK_PARTITION_ENFORCE_IN = 0x08, OPA_PI_MASK_PARTITION_ENFORCE_OUT = 0x04, /* operational_vls */ OPA_PI_MASK_OPERATIONAL_VL = 0x1F, /* sa_qp */ OPA_PI_MASK_SA_QP = 0x00FFFFFF, /* sm_trap_qp */ OPA_PI_MASK_SM_TRAP_QP = 0x00FFFFFF, /* localphy_overrun_errors */ OPA_PI_MASK_LOCAL_PHY_ERRORS = 0xF0, OPA_PI_MASK_OVERRUN_ERRORS = 0x0F, /* clientrereg_subnettimeout */ OPA_PI_MASK_CLIENT_REREGISTER = 0x80, OPA_PI_MASK_SUBNET_TIMEOUT = 0x1F, /* port_link_mode */ OPA_PI_MASK_PORT_LINK_SUPPORTED = (0x001F << 10), OPA_PI_MASK_PORT_LINK_ENABLED = (0x001F << 5), OPA_PI_MASK_PORT_LINK_ACTIVE = (0x001F << 0), /* port_link_crc_mode */ OPA_PI_MASK_PORT_LINK_CRC_SUPPORTED = 0x0F00, OPA_PI_MASK_PORT_LINK_CRC_ENABLED = 0x00F0, OPA_PI_MASK_PORT_LINK_CRC_ACTIVE = 0x000F, /* port_mode */ OPA_PI_MASK_PORT_MODE_SECURITY_CHECK = 0x0001, OPA_PI_MASK_PORT_MODE_16B_TRAP_QUERY = 0x0002, OPA_PI_MASK_PORT_MODE_PKEY_CONVERT = 0x0004, OPA_PI_MASK_PORT_MODE_SC2SC_MAPPING = 0x0008, OPA_PI_MASK_PORT_MODE_VL_MARKER = 0x0010, OPA_PI_MASK_PORT_PASS_THROUGH = 0x0020, OPA_PI_MASK_PORT_ACTIVE_OPTOMIZE = 0x0040, /* flit_control.interleave */ OPA_PI_MASK_INTERLEAVE_DIST_SUP = (0x0003 << 12), OPA_PI_MASK_INTERLEAVE_DIST_ENABLE = (0x0003 << 10), OPA_PI_MASK_INTERLEAVE_MAX_NEST_TX = (0x001F << 5), OPA_PI_MASK_INTERLEAVE_MAX_NEST_RX = (0x001F << 0), /* port_error_action */ OPA_PI_MASK_EX_BUFFER_OVERRUN = 0x80000000, /* 7 bits reserved */ OPA_PI_MASK_FM_CFG_ERR_EXCEED_MULTICAST_LIMIT = 0x00800000, OPA_PI_MASK_FM_CFG_BAD_CONTROL_FLIT = 0x00400000, OPA_PI_MASK_FM_CFG_BAD_PREEMPT = 0x00200000, OPA_PI_MASK_FM_CFG_UNSUPPORTED_VL_MARKER = 0x00100000, OPA_PI_MASK_FM_CFG_BAD_CRDT_ACK = 0x00080000, OPA_PI_MASK_FM_CFG_BAD_CTRL_DIST = 0x00040000, OPA_PI_MASK_FM_CFG_BAD_TAIL_DIST = 0x00020000, OPA_PI_MASK_FM_CFG_BAD_HEAD_DIST = 0x00010000, /* 2 bits reserved */ OPA_PI_MASK_PORT_RCV_BAD_VL_MARKER = 0x00002000, OPA_PI_MASK_PORT_RCV_PREEMPT_VL15 = 0x00001000, OPA_PI_MASK_PORT_RCV_PREEMPT_ERROR = 0x00000800, /* 1 bit reserved */ OPA_PI_MASK_PORT_RCV_BAD_MidTail = 0x00000200, /* 1 bit reserved */ OPA_PI_MASK_PORT_RCV_BAD_SC = 0x00000080, OPA_PI_MASK_PORT_RCV_BAD_L2 = 0x00000040, OPA_PI_MASK_PORT_RCV_BAD_DLID = 0x00000020, OPA_PI_MASK_PORT_RCV_BAD_SLID = 0x00000010, OPA_PI_MASK_PORT_RCV_PKTLEN_TOOSHORT = 0x00000008, OPA_PI_MASK_PORT_RCV_PKTLEN_TOOLONG = 0x00000004, OPA_PI_MASK_PORT_RCV_BAD_PKTLEN = 0x00000002, OPA_PI_MASK_PORT_RCV_BAD_LT = 0x00000001, /* pass_through.res_drctl */ OPA_PI_MASK_PASS_THROUGH_DR_CONTROL = 0x01, /* buffer_units */ OPA_PI_MASK_BUF_UNIT_VL15_INIT = (0x00000FFF << 11), OPA_PI_MASK_BUF_UNIT_VL15_CREDIT_RATE = (0x0000001F << 6), OPA_PI_MASK_BUF_UNIT_CREDIT_ACK = (0x00000003 << 3), OPA_PI_MASK_BUF_UNIT_BUF_ALLOC = (0x00000003 << 0), /* neigh_mtu.pvlx_to_mtu */ OPA_PI_MASK_NEIGH_MTU_PVL0 = 0xF0, OPA_PI_MASK_NEIGH_MTU_PVL1 = 0x0F, /* neigh_mtu.vlstall_hoq_life */ OPA_PI_MASK_VL_STALL = (0x03 << 5), OPA_PI_MASK_HOQ_LIFE = (0x1F << 0), /* port_neigh_mode */ OPA_PI_MASK_NEIGH_MGMT_ALLOWED = (0x01 << 3), OPA_PI_MASK_NEIGH_FW_AUTH_BYPASS = (0x01 << 2), OPA_PI_MASK_NEIGH_NODE_TYPE = (0x03 << 0), /* resptime_value */ OPA_PI_MASK_RESPONSE_TIME_VALUE = 0x1F, /* mtucap */ OPA_PI_MASK_MTU_CAP = 0x0F, }; struct opa_port_states { u8 reserved; u8 ledenable_offlinereason; /* 1 res, 1 bit, 6 bits */ u8 reserved2; u8 portphysstate_portstate; /* 4 bits, 4 bits */ }; struct opa_port_state_info { struct opa_port_states port_states; __be16 link_width_downgrade_tx_active; __be16 link_width_downgrade_rx_active; }; struct opa_port_info { __be32 lid; __be32 flow_control_mask; struct { u8 res; /* was inittype */ u8 cap; /* 3 res, 5 bits */ __be16 high_limit; __be16 preempt_limit; u8 arb_high_cap; u8 arb_low_cap; } vl; struct opa_port_states port_states; u8 port_phys_conf; /* 4 res, 4 bits */ u8 collectivemask_multicastmask; /* 2 res, 3, 3 */ u8 mkeyprotect_lmc; /* 2 bits, 2 res, 4 bits */ u8 smsl; /* 3 res, 5 bits */ u8 partenforce_filterraw; /* bit fields */ u8 operational_vls; /* 3 res, 5 bits */ __be16 pkey_8b; __be16 pkey_10b; __be16 mkey_violations; __be16 pkey_violations; __be16 qkey_violations; __be32 sm_trap_qp; /* 8 bits, 24 bits */ __be32 sa_qp; /* 8 bits, 24 bits */ u8 neigh_port_num; u8 link_down_reason; u8 neigh_link_down_reason; u8 clientrereg_subnettimeout; /* 1 bit, 2 bits, 5 */ struct { __be16 supported; __be16 enabled; __be16 active; } link_speed; struct { __be16 supported; __be16 enabled; __be16 active; } link_width; struct { __be16 supported; __be16 enabled; __be16 tx_active; __be16 rx_active; } link_width_downgrade; __be16 port_link_mode; /* 1 res, 5 bits, 5 bits, 5 bits */ __be16 port_ltp_crc_mode; /* 4 res, 4 bits, 4 bits, 4 bits */ __be16 port_mode; /* 9 res, bit fields */ struct { __be16 supported; __be16 enabled; } port_packet_format; struct { __be16 interleave; /* 2 res, 2,2,5,5 */ struct { __be16 min_initial; __be16 min_tail; u8 large_pkt_limit; u8 small_pkt_limit; u8 max_small_pkt_limit; u8 preemption_limit; } preemption; } flit_control; __be32 reserved4; __be32 port_error_action; /* bit field */ struct { u8 egress_port; u8 res_drctl; /* 7 res, 1 */ } pass_through; __be16 mkey_lease_period; __be32 buffer_units; /* 9 res, 12, 5, 3, 3 */ __be32 reserved5; __be32 sm_lid; __be64 mkey; __be64 subnet_prefix; struct { u8 pvlx_to_mtu[OPA_MAX_VLS/2]; /* 4 bits, 4 bits */ } neigh_mtu; struct { u8 vlstall_hoqlife; /* 3 bits, 5 bits */ } xmit_q[OPA_MAX_VLS]; struct { u8 addr[16]; } ipaddr_ipv6; struct { u8 addr[4]; } ipaddr_ipv4; u32 reserved6; u32 reserved7; u32 reserved8; __be64 neigh_node_guid; __be32 ib_cap_mask; __be16 reserved9; /* was ib_cap_mask2 */ __be16 opa_cap_mask; __be32 reserved10; /* was link_roundtrip_latency */ __be16 overall_buffer_space; __be16 reserved11; /* was max_credit_hint */ __be16 diag_code; struct { u8 buffer; u8 wire; } replay_depth; u8 port_neigh_mode; u8 mtucap; /* 4 res, 4 bits */ u8 resptimevalue; /* 3 res, 5 bits */ u8 local_port_num; u8 reserved12; u8 reserved13; /* was guid_cap */ } __packed; #endif /* OPA_PORT_INFO_H */ PK ! E��?4 4 rdma/tid_rdma_defs.hnu �[��� /* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */ /* * Copyright(c) 2018 Intel Corporation. * */ #ifndef TID_RDMA_DEFS_H #define TID_RDMA_DEFS_H #include <rdma/ib_pack.h> struct tid_rdma_read_req { __le32 kdeth0; __le32 kdeth1; struct ib_reth reth; __be32 tid_flow_psn; __be32 tid_flow_qp; __be32 verbs_qp; }; struct tid_rdma_read_resp { __le32 kdeth0; __le32 kdeth1; __be32 aeth; __be32 reserved[4]; __be32 verbs_psn; __be32 verbs_qp; }; struct tid_rdma_write_req { __le32 kdeth0; __le32 kdeth1; struct ib_reth reth; __be32 reserved[2]; __be32 verbs_qp; }; struct tid_rdma_write_resp { __le32 kdeth0; __le32 kdeth1; __be32 aeth; __be32 reserved[3]; __be32 tid_flow_psn; __be32 tid_flow_qp; __be32 verbs_qp; }; struct tid_rdma_write_data { __le32 kdeth0; __le32 kdeth1; __be32 reserved[6]; __be32 verbs_qp; }; struct tid_rdma_resync { __le32 kdeth0; __le32 kdeth1; __be32 reserved[6]; __be32 verbs_qp; }; struct tid_rdma_ack { __le32 kdeth0; __le32 kdeth1; __be32 aeth; __be32 reserved[2]; __be32 tid_flow_psn; __be32 verbs_psn; __be32 tid_flow_qp; __be32 verbs_qp; }; /* * TID RDMA Opcodes */ #define IB_OPCODE_TID_RDMA 0xe0 enum { IB_OPCODE_WRITE_REQ = 0x0, IB_OPCODE_WRITE_RESP = 0x1, IB_OPCODE_WRITE_DATA = 0x2, IB_OPCODE_WRITE_DATA_LAST = 0x3, IB_OPCODE_READ_REQ = 0x4, IB_OPCODE_READ_RESP = 0x5, IB_OPCODE_RESYNC = 0x6, IB_OPCODE_ACK = 0x7, IB_OPCODE(TID_RDMA, WRITE_REQ), IB_OPCODE(TID_RDMA, WRITE_RESP), IB_OPCODE(TID_RDMA, WRITE_DATA), IB_OPCODE(TID_RDMA, WRITE_DATA_LAST), IB_OPCODE(TID_RDMA, READ_REQ), IB_OPCODE(TID_RDMA, READ_RESP), IB_OPCODE(TID_RDMA, RESYNC), IB_OPCODE(TID_RDMA, ACK), }; #define TID_OP(x) IB_OPCODE_TID_RDMA_##x /* * Define TID RDMA specific WR opcodes. The ib_wr_opcode * enum already provides some reserved values for use by * low level drivers. Two of those are used but renamed * to be more descriptive. */ #define IB_WR_TID_RDMA_WRITE IB_WR_RESERVED1 #define IB_WR_TID_RDMA_READ IB_WR_RESERVED2 #endif /* TID_RDMA_DEFS_H */ PK ! �!�� � rdma/uverbs_named_ioctl.hnu �[��� /* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ /* * Copyright (c) 2018, Mellanox Technologies inc. All rights reserved. */ #ifndef _UVERBS_NAMED_IOCTL_ #define _UVERBS_NAMED_IOCTL_ #include <rdma/uverbs_ioctl.h> #ifndef UVERBS_MODULE_NAME #error "Please #define UVERBS_MODULE_NAME before including rdma/uverbs_named_ioctl.h" #endif #define _UVERBS_PASTE(x, y) x ## y #define _UVERBS_NAME(x, y) _UVERBS_PASTE(x, y) #define UVERBS_METHOD(id) _UVERBS_NAME(UVERBS_MODULE_NAME, _method_##id) #define UVERBS_HANDLER(id) _UVERBS_NAME(UVERBS_MODULE_NAME, _handler_##id) #define UVERBS_OBJECT(id) _UVERBS_NAME(UVERBS_MODULE_NAME, _object_##id) /* These are static so they do not need to be qualified */ #define UVERBS_METHOD_ATTRS(method_id) _method_attrs_##method_id #define UVERBS_OBJECT_METHODS(object_id) _UVERBS_NAME(_object_methods_##object_id, __LINE__) #define DECLARE_UVERBS_NAMED_METHOD(_method_id, ...) \ static const struct uverbs_attr_def *const UVERBS_METHOD_ATTRS( \ _method_id)[] = { __VA_ARGS__ }; \ static const struct uverbs_method_def UVERBS_METHOD(_method_id) = { \ .id = _method_id, \ .handler = UVERBS_HANDLER(_method_id), \ .num_attrs = ARRAY_SIZE(UVERBS_METHOD_ATTRS(_method_id)), \ .attrs = &UVERBS_METHOD_ATTRS(_method_id), \ } /* Create a standard destroy method using the default handler. The handle_attr * argument must be the attribute specifying the handle to destroy, the * default handler does not support any other attributes. */ #define DECLARE_UVERBS_NAMED_METHOD_DESTROY(_method_id, _handle_attr) \ static const struct uverbs_attr_def *const UVERBS_METHOD_ATTRS( \ _method_id)[] = { _handle_attr }; \ static const struct uverbs_method_def UVERBS_METHOD(_method_id) = { \ .id = _method_id, \ .handler = uverbs_destroy_def_handler, \ .num_attrs = ARRAY_SIZE(UVERBS_METHOD_ATTRS(_method_id)), \ .attrs = &UVERBS_METHOD_ATTRS(_method_id), \ } #define DECLARE_UVERBS_NAMED_OBJECT(_object_id, _type_attrs, ...) \ static const struct uverbs_method_def *const UVERBS_OBJECT_METHODS( \ _object_id)[] = { __VA_ARGS__ }; \ static const struct uverbs_object_def UVERBS_OBJECT(_object_id) = { \ .id = _object_id, \ .type_attrs = &_type_attrs, \ .num_methods = ARRAY_SIZE(UVERBS_OBJECT_METHODS(_object_id)), \ .methods = &UVERBS_OBJECT_METHODS(_object_id) \ } /* * Declare global methods. These still have a unique object_id because we * identify all uapi methods with a (object,method) tuple. However, they have * no type pointer. */ #define DECLARE_UVERBS_GLOBAL_METHODS(_object_id, ...) \ static const struct uverbs_method_def *const UVERBS_OBJECT_METHODS( \ _object_id)[] = { __VA_ARGS__ }; \ static const struct uverbs_object_def UVERBS_OBJECT(_object_id) = { \ .id = _object_id, \ .num_methods = ARRAY_SIZE(UVERBS_OBJECT_METHODS(_object_id)), \ .methods = &UVERBS_OBJECT_METHODS(_object_id) \ } /* Used by drivers to declare a complete parsing tree for new methods */ #define ADD_UVERBS_METHODS(_name, _object_id, ...) \ static const struct uverbs_method_def *const UVERBS_OBJECT_METHODS( \ _object_id)[] = { __VA_ARGS__ }; \ static const struct uverbs_object_def _name = { \ .id = _object_id, \ .num_methods = ARRAY_SIZE(UVERBS_OBJECT_METHODS(_object_id)), \ .methods = &UVERBS_OBJECT_METHODS(_object_id) \ }; /* Used by drivers to declare a complete parsing tree for a single method that * differs only in having additional driver specific attributes. */ #define ADD_UVERBS_ATTRIBUTES_SIMPLE(_name, _object_id, _method_id, ...) \ static const struct uverbs_attr_def *const UVERBS_METHOD_ATTRS( \ _method_id)[] = { __VA_ARGS__ }; \ static const struct uverbs_method_def UVERBS_METHOD(_method_id) = { \ .id = _method_id, \ .num_attrs = ARRAY_SIZE(UVERBS_METHOD_ATTRS(_method_id)), \ .attrs = &UVERBS_METHOD_ATTRS(_method_id), \ }; \ ADD_UVERBS_METHODS(_name, _object_id, &UVERBS_METHOD(_method_id)) #endif PK ! �kW4F) F) rdma/ib_verbs.hnu �[��� /* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ /* * Copyright (c) 2004 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2004 Infinicon Corporation. All rights reserved. * Copyright (c) 2004, 2020 Intel Corporation. All rights reserved. * Copyright (c) 2004 Topspin Corporation. All rights reserved. * Copyright (c) 2004 Voltaire Corporation. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2005, 2006, 2007 Cisco Systems. All rights reserved. */ #ifndef IB_VERBS_H #define IB_VERBS_H #include <linux/ethtool.h> #include <linux/types.h> #include <linux/device.h> #include <linux/dma-mapping.h> #include <linux/kref.h> #include <linux/list.h> #include <linux/rwsem.h> #include <linux/workqueue.h> #include <linux/irq_poll.h> #include <uapi/linux/if_ether.h> #include <net/ipv6.h> #include <net/ip.h> #include <linux/string.h> #include <linux/slab.h> #include <linux/netdevice.h> #include <linux/refcount.h> #include <linux/if_link.h> #include <linux/atomic.h> #include <linux/mmu_notifier.h> #include <linux/uaccess.h> #include <linux/cgroup_rdma.h> #include <linux/irqflags.h> #include <linux/preempt.h> #include <linux/dim.h> #include <uapi/rdma/ib_user_verbs.h> #include <rdma/rdma_counter.h> #include <rdma/restrack.h> #include <rdma/signature.h> #include <uapi/rdma/rdma_user_ioctl.h> #include <uapi/rdma/ib_user_ioctl_verbs.h> #define IB_FW_VERSION_NAME_MAX ETHTOOL_FWVERS_LEN struct ib_umem_odp; struct ib_uqp_object; struct ib_usrq_object; struct ib_uwq_object; struct rdma_cm_id; struct ib_port; struct hw_stats_device_data; extern struct workqueue_struct *ib_wq; extern struct workqueue_struct *ib_comp_wq; extern struct workqueue_struct *ib_comp_unbound_wq; struct ib_ucq_object; __printf(3, 4) __cold void ibdev_printk(const char *level, const struct ib_device *ibdev, const char *format, ...); __printf(2, 3) __cold void ibdev_emerg(const struct ib_device *ibdev, const char *format, ...); __printf(2, 3) __cold void ibdev_alert(const struct ib_device *ibdev, const char *format, ...); __printf(2, 3) __cold void ibdev_crit(const struct ib_device *ibdev, const char *format, ...); __printf(2, 3) __cold void ibdev_err(const struct ib_device *ibdev, const char *format, ...); __printf(2, 3) __cold void ibdev_warn(const struct ib_device *ibdev, const char *format, ...); __printf(2, 3) __cold void ibdev_notice(const struct ib_device *ibdev, const char *format, ...); __printf(2, 3) __cold void ibdev_info(const struct ib_device *ibdev, const char *format, ...); #if defined(CONFIG_DYNAMIC_DEBUG) || \ (defined(CONFIG_DYNAMIC_DEBUG_CORE) && defined(DYNAMIC_DEBUG_MODULE)) #define ibdev_dbg(__dev, format, args...) \ dynamic_ibdev_dbg(__dev, format, ##args) #else __printf(2, 3) __cold static inline void ibdev_dbg(const struct ib_device *ibdev, const char *format, ...) {} #endif #define ibdev_level_ratelimited(ibdev_level, ibdev, fmt, ...) \ do { \ static DEFINE_RATELIMIT_STATE(_rs, \ DEFAULT_RATELIMIT_INTERVAL, \ DEFAULT_RATELIMIT_BURST); \ if (__ratelimit(&_rs)) \ ibdev_level(ibdev, fmt, ##__VA_ARGS__); \ } while (0) #define ibdev_emerg_ratelimited(ibdev, fmt, ...) \ ibdev_level_ratelimited(ibdev_emerg, ibdev, fmt, ##__VA_ARGS__) #define ibdev_alert_ratelimited(ibdev, fmt, ...) \ ibdev_level_ratelimited(ibdev_alert, ibdev, fmt, ##__VA_ARGS__) #define ibdev_crit_ratelimited(ibdev, fmt, ...) \ ibdev_level_ratelimited(ibdev_crit, ibdev, fmt, ##__VA_ARGS__) #define ibdev_err_ratelimited(ibdev, fmt, ...) \ ibdev_level_ratelimited(ibdev_err, ibdev, fmt, ##__VA_ARGS__) #define ibdev_warn_ratelimited(ibdev, fmt, ...) \ ibdev_level_ratelimited(ibdev_warn, ibdev, fmt, ##__VA_ARGS__) #define ibdev_notice_ratelimited(ibdev, fmt, ...) \ ibdev_level_ratelimited(ibdev_notice, ibdev, fmt, ##__VA_ARGS__) #define ibdev_info_ratelimited(ibdev, fmt, ...) \ ibdev_level_ratelimited(ibdev_info, ibdev, fmt, ##__VA_ARGS__) #if defined(CONFIG_DYNAMIC_DEBUG) || \ (defined(CONFIG_DYNAMIC_DEBUG_CORE) && defined(DYNAMIC_DEBUG_MODULE)) /* descriptor check is first to prevent flooding with "callbacks suppressed" */ #define ibdev_dbg_ratelimited(ibdev, fmt, ...) \ do { \ static DEFINE_RATELIMIT_STATE(_rs, \ DEFAULT_RATELIMIT_INTERVAL, \ DEFAULT_RATELIMIT_BURST); \ DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, fmt); \ if (DYNAMIC_DEBUG_BRANCH(descriptor) && __ratelimit(&_rs)) \ __dynamic_ibdev_dbg(&descriptor, ibdev, fmt, \ ##__VA_ARGS__); \ } while (0) #else __printf(2, 3) __cold static inline void ibdev_dbg_ratelimited(const struct ib_device *ibdev, const char *format, ...) {} #endif union ib_gid { u8 raw[16]; struct { __be64 subnet_prefix; __be64 interface_id; } global; }; extern union ib_gid zgid; enum ib_gid_type { IB_GID_TYPE_IB = IB_UVERBS_GID_TYPE_IB, IB_GID_TYPE_ROCE = IB_UVERBS_GID_TYPE_ROCE_V1, IB_GID_TYPE_ROCE_UDP_ENCAP = IB_UVERBS_GID_TYPE_ROCE_V2, IB_GID_TYPE_SIZE }; #define ROCE_V2_UDP_DPORT 4791 struct ib_gid_attr { struct net_device __rcu *ndev; struct ib_device *device; union ib_gid gid; enum ib_gid_type gid_type; u16 index; u32 port_num; }; enum { /* set the local administered indication */ IB_SA_WELL_KNOWN_GUID = BIT_ULL(57) | 2, }; enum rdma_transport_type { RDMA_TRANSPORT_IB, RDMA_TRANSPORT_IWARP, RDMA_TRANSPORT_USNIC, RDMA_TRANSPORT_USNIC_UDP, RDMA_TRANSPORT_UNSPECIFIED, }; enum rdma_protocol_type { RDMA_PROTOCOL_IB, RDMA_PROTOCOL_IBOE, RDMA_PROTOCOL_IWARP, RDMA_PROTOCOL_USNIC_UDP }; __attribute_const__ enum rdma_transport_type rdma_node_get_transport(unsigned int node_type); enum rdma_network_type { RDMA_NETWORK_IB, RDMA_NETWORK_ROCE_V1, RDMA_NETWORK_IPV4, RDMA_NETWORK_IPV6 }; static inline enum ib_gid_type ib_network_to_gid_type(enum rdma_network_type network_type) { if (network_type == RDMA_NETWORK_IPV4 || network_type == RDMA_NETWORK_IPV6) return IB_GID_TYPE_ROCE_UDP_ENCAP; else if (network_type == RDMA_NETWORK_ROCE_V1) return IB_GID_TYPE_ROCE; else return IB_GID_TYPE_IB; } static inline enum rdma_network_type rdma_gid_attr_network_type(const struct ib_gid_attr *attr) { if (attr->gid_type == IB_GID_TYPE_IB) return RDMA_NETWORK_IB; if (attr->gid_type == IB_GID_TYPE_ROCE) return RDMA_NETWORK_ROCE_V1; if (ipv6_addr_v4mapped((struct in6_addr *)&attr->gid)) return RDMA_NETWORK_IPV4; else return RDMA_NETWORK_IPV6; } enum rdma_link_layer { IB_LINK_LAYER_UNSPECIFIED, IB_LINK_LAYER_INFINIBAND, IB_LINK_LAYER_ETHERNET, }; enum ib_device_cap_flags { IB_DEVICE_RESIZE_MAX_WR = (1 << 0), IB_DEVICE_BAD_PKEY_CNTR = (1 << 1), IB_DEVICE_BAD_QKEY_CNTR = (1 << 2), IB_DEVICE_RAW_MULTI = (1 << 3), IB_DEVICE_AUTO_PATH_MIG = (1 << 4), IB_DEVICE_CHANGE_PHY_PORT = (1 << 5), IB_DEVICE_UD_AV_PORT_ENFORCE = (1 << 6), IB_DEVICE_CURR_QP_STATE_MOD = (1 << 7), IB_DEVICE_SHUTDOWN_PORT = (1 << 8), /* Not in use, former INIT_TYPE = (1 << 9),*/ IB_DEVICE_PORT_ACTIVE_EVENT = (1 << 10), IB_DEVICE_SYS_IMAGE_GUID = (1 << 11), IB_DEVICE_RC_RNR_NAK_GEN = (1 << 12), IB_DEVICE_SRQ_RESIZE = (1 << 13), IB_DEVICE_N_NOTIFY_CQ = (1 << 14), /* * This device supports a per-device lkey or stag that can be * used without performing a memory registration for the local * memory. Note that ULPs should never check this flag, but * instead of use the local_dma_lkey flag in the ib_pd structure, * which will always contain a usable lkey. */ IB_DEVICE_LOCAL_DMA_LKEY = (1 << 15), /* Reserved, old SEND_W_INV = (1 << 16),*/ IB_DEVICE_MEM_WINDOW = (1 << 17), /* * Devices should set IB_DEVICE_UD_IP_SUM if they support * insertion of UDP and TCP checksum on outgoing UD IPoIB * messages and can verify the validity of checksum for * incoming messages. Setting this flag implies that the * IPoIB driver may set NETIF_F_IP_CSUM for datagram mode. */ IB_DEVICE_UD_IP_CSUM = (1 << 18), IB_DEVICE_UD_TSO = (1 << 19), IB_DEVICE_XRC = (1 << 20), /* * This device supports the IB "base memory management extension", * which includes support for fast registrations (IB_WR_REG_MR, * IB_WR_LOCAL_INV and IB_WR_SEND_WITH_INV verbs). This flag should * also be set by any iWarp device which must support FRs to comply * to the iWarp verbs spec. iWarp devices also support the * IB_WR_RDMA_READ_WITH_INV verb for RDMA READs that invalidate the * stag. */ IB_DEVICE_MEM_MGT_EXTENSIONS = (1 << 21), IB_DEVICE_BLOCK_MULTICAST_LOOPBACK = (1 << 22), IB_DEVICE_MEM_WINDOW_TYPE_2A = (1 << 23), IB_DEVICE_MEM_WINDOW_TYPE_2B = (1 << 24), IB_DEVICE_RC_IP_CSUM = (1 << 25), /* Deprecated. Please use IB_RAW_PACKET_CAP_IP_CSUM. */ IB_DEVICE_RAW_IP_CSUM = (1 << 26), /* * Devices should set IB_DEVICE_CROSS_CHANNEL if they * support execution of WQEs that involve synchronization * of I/O operations with single completion queue managed * by hardware. */ IB_DEVICE_CROSS_CHANNEL = (1 << 27), IB_DEVICE_MANAGED_FLOW_STEERING = (1 << 29), IB_DEVICE_INTEGRITY_HANDOVER = (1 << 30), IB_DEVICE_ON_DEMAND_PAGING = (1ULL << 31), IB_DEVICE_SG_GAPS_REG = (1ULL << 32), IB_DEVICE_VIRTUAL_FUNCTION = (1ULL << 33), /* Deprecated. Please use IB_RAW_PACKET_CAP_SCATTER_FCS. */ IB_DEVICE_RAW_SCATTER_FCS = (1ULL << 34), IB_DEVICE_RDMA_NETDEV_OPA = (1ULL << 35), /* The device supports padding incoming writes to cacheline. */ IB_DEVICE_PCI_WRITE_END_PADDING = (1ULL << 36), IB_DEVICE_ALLOW_USER_UNREG = (1ULL << 37), }; enum ib_atomic_cap { IB_ATOMIC_NONE, IB_ATOMIC_HCA, IB_ATOMIC_GLOB }; enum ib_odp_general_cap_bits { IB_ODP_SUPPORT = 1 << 0, IB_ODP_SUPPORT_IMPLICIT = 1 << 1, }; enum ib_odp_transport_cap_bits { IB_ODP_SUPPORT_SEND = 1 << 0, IB_ODP_SUPPORT_RECV = 1 << 1, IB_ODP_SUPPORT_WRITE = 1 << 2, IB_ODP_SUPPORT_READ = 1 << 3, IB_ODP_SUPPORT_ATOMIC = 1 << 4, IB_ODP_SUPPORT_SRQ_RECV = 1 << 5, }; struct ib_odp_caps { uint64_t general_caps; struct { uint32_t rc_odp_caps; uint32_t uc_odp_caps; uint32_t ud_odp_caps; uint32_t xrc_odp_caps; } per_transport_caps; }; struct ib_rss_caps { /* Corresponding bit will be set if qp type from * 'enum ib_qp_type' is supported, e.g. * supported_qpts |= 1 << IB_QPT_UD */ u32 supported_qpts; u32 max_rwq_indirection_tables; u32 max_rwq_indirection_table_size; }; enum ib_tm_cap_flags { /* Support tag matching with rendezvous offload for RC transport */ IB_TM_CAP_RNDV_RC = 1 << 0, }; struct ib_tm_caps { /* Max size of RNDV header */ u32 max_rndv_hdr_size; /* Max number of entries in tag matching list */ u32 max_num_tags; /* From enum ib_tm_cap_flags */ u32 flags; /* Max number of outstanding list operations */ u32 max_ops; /* Max number of SGE in tag matching entry */ u32 max_sge; }; struct ib_cq_init_attr { unsigned int cqe; u32 comp_vector; u32 flags; }; enum ib_cq_attr_mask { IB_CQ_MODERATE = 1 << 0, }; struct ib_cq_caps { u16 max_cq_moderation_count; u16 max_cq_moderation_period; }; struct ib_dm_mr_attr { u64 length; u64 offset; u32 access_flags; }; struct ib_dm_alloc_attr { u64 length; u32 alignment; u32 flags; }; struct ib_device_attr { u64 fw_ver; __be64 sys_image_guid; u64 max_mr_size; u64 page_size_cap; u32 vendor_id; u32 vendor_part_id; u32 hw_ver; int max_qp; int max_qp_wr; u64 device_cap_flags; int max_send_sge; int max_recv_sge; int max_sge_rd; int max_cq; int max_cqe; int max_mr; int max_pd; int max_qp_rd_atom; int max_ee_rd_atom; int max_res_rd_atom; int max_qp_init_rd_atom; int max_ee_init_rd_atom; enum ib_atomic_cap atomic_cap; enum ib_atomic_cap masked_atomic_cap; int max_ee; int max_rdd; int max_mw; int max_raw_ipv6_qp; int max_raw_ethy_qp; int max_mcast_grp; int max_mcast_qp_attach; int max_total_mcast_qp_attach; int max_ah; int max_srq; int max_srq_wr; int max_srq_sge; unsigned int max_fast_reg_page_list_len; unsigned int max_pi_fast_reg_page_list_len; u16 max_pkeys; u8 local_ca_ack_delay; int sig_prot_cap; int sig_guard_cap; struct ib_odp_caps odp_caps; uint64_t timestamp_mask; uint64_t hca_core_clock; /* in KHZ */ struct ib_rss_caps rss_caps; u32 max_wq_type_rq; u32 raw_packet_caps; /* Use ib_raw_packet_caps enum */ struct ib_tm_caps tm_caps; struct ib_cq_caps cq_caps; u64 max_dm_size; /* Max entries for sgl for optimized performance per READ */ u32 max_sgl_rd; }; enum ib_mtu { IB_MTU_256 = 1, IB_MTU_512 = 2, IB_MTU_1024 = 3, IB_MTU_2048 = 4, IB_MTU_4096 = 5 }; enum opa_mtu { OPA_MTU_8192 = 6, OPA_MTU_10240 = 7 }; static inline int ib_mtu_enum_to_int(enum ib_mtu mtu) { switch (mtu) { case IB_MTU_256: return 256; case IB_MTU_512: return 512; case IB_MTU_1024: return 1024; case IB_MTU_2048: return 2048; case IB_MTU_4096: return 4096; default: return -1; } } static inline enum ib_mtu ib_mtu_int_to_enum(int mtu) { if (mtu >= 4096) return IB_MTU_4096; else if (mtu >= 2048) return IB_MTU_2048; else if (mtu >= 1024) return IB_MTU_1024; else if (mtu >= 512) return IB_MTU_512; else return IB_MTU_256; } static inline int opa_mtu_enum_to_int(enum opa_mtu mtu) { switch (mtu) { case OPA_MTU_8192: return 8192; case OPA_MTU_10240: return 10240; default: return(ib_mtu_enum_to_int((enum ib_mtu)mtu)); } } static inline enum opa_mtu opa_mtu_int_to_enum(int mtu) { if (mtu >= 10240) return OPA_MTU_10240; else if (mtu >= 8192) return OPA_MTU_8192; else return ((enum opa_mtu)ib_mtu_int_to_enum(mtu)); } enum ib_port_state { IB_PORT_NOP = 0, IB_PORT_DOWN = 1, IB_PORT_INIT = 2, IB_PORT_ARMED = 3, IB_PORT_ACTIVE = 4, IB_PORT_ACTIVE_DEFER = 5 }; enum ib_port_phys_state { IB_PORT_PHYS_STATE_SLEEP = 1, IB_PORT_PHYS_STATE_POLLING = 2, IB_PORT_PHYS_STATE_DISABLED = 3, IB_PORT_PHYS_STATE_PORT_CONFIGURATION_TRAINING = 4, IB_PORT_PHYS_STATE_LINK_UP = 5, IB_PORT_PHYS_STATE_LINK_ERROR_RECOVERY = 6, IB_PORT_PHYS_STATE_PHY_TEST = 7, }; enum ib_port_width { IB_WIDTH_1X = 1, IB_WIDTH_2X = 16, IB_WIDTH_4X = 2, IB_WIDTH_8X = 4, IB_WIDTH_12X = 8 }; static inline int ib_width_enum_to_int(enum ib_port_width width) { switch (width) { case IB_WIDTH_1X: return 1; case IB_WIDTH_2X: return 2; case IB_WIDTH_4X: return 4; case IB_WIDTH_8X: return 8; case IB_WIDTH_12X: return 12; default: return -1; } } enum ib_port_speed { IB_SPEED_SDR = 1, IB_SPEED_DDR = 2, IB_SPEED_QDR = 4, IB_SPEED_FDR10 = 8, IB_SPEED_FDR = 16, IB_SPEED_EDR = 32, IB_SPEED_HDR = 64, IB_SPEED_NDR = 128, }; /** * struct rdma_hw_stats * @lock - Mutex to protect parallel write access to lifespan and values * of counters, which are 64bits and not guaranteeed to be written * atomicaly on 32bits systems. * @timestamp - Used by the core code to track when the last update was * @lifespan - Used by the core code to determine how old the counters * should be before being updated again. Stored in jiffies, defaults * to 10 milliseconds, drivers can override the default be specifying * their own value during their allocation routine. * @name - Array of pointers to static names used for the counters in * directory. * @num_counters - How many hardware counters there are. If name is * shorter than this number, a kernel oops will result. Driver authors * are encouraged to leave BUILD_BUG_ON(ARRAY_SIZE(@name) < num_counters) * in their code to prevent this. * @value - Array of u64 counters that are accessed by the sysfs code and * filled in by the drivers get_stats routine */ struct rdma_hw_stats { struct mutex lock; /* Protect lifespan and values[] */ unsigned long timestamp; unsigned long lifespan; const char * const *names; int num_counters; u64 value[]; }; #define RDMA_HW_STATS_DEFAULT_LIFESPAN 10 /** * rdma_alloc_hw_stats_struct - Helper function to allocate dynamic struct * for drivers. * @names - Array of static const char * * @num_counters - How many elements in array * @lifespan - How many milliseconds between updates */ static inline struct rdma_hw_stats *rdma_alloc_hw_stats_struct( const char * const *names, int num_counters, unsigned long lifespan) { struct rdma_hw_stats *stats; stats = kzalloc(sizeof(*stats) + num_counters * sizeof(u64), GFP_KERNEL); if (!stats) return NULL; stats->names = names; stats->num_counters = num_counters; stats->lifespan = msecs_to_jiffies(lifespan); return stats; } /* Define bits for the various functionality this port needs to be supported by * the core. */ /* Management 0x00000FFF */ #define RDMA_CORE_CAP_IB_MAD 0x00000001 #define RDMA_CORE_CAP_IB_SMI 0x00000002 #define RDMA_CORE_CAP_IB_CM 0x00000004 #define RDMA_CORE_CAP_IW_CM 0x00000008 #define RDMA_CORE_CAP_IB_SA 0x00000010 #define RDMA_CORE_CAP_OPA_MAD 0x00000020 /* Address format 0x000FF000 */ #define RDMA_CORE_CAP_AF_IB 0x00001000 #define RDMA_CORE_CAP_ETH_AH 0x00002000 #define RDMA_CORE_CAP_OPA_AH 0x00004000 #define RDMA_CORE_CAP_IB_GRH_REQUIRED 0x00008000 /* Protocol 0xFFF00000 */ #define RDMA_CORE_CAP_PROT_IB 0x00100000 #define RDMA_CORE_CAP_PROT_ROCE 0x00200000 #define RDMA_CORE_CAP_PROT_IWARP 0x00400000 #define RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP 0x00800000 #define RDMA_CORE_CAP_PROT_RAW_PACKET 0x01000000 #define RDMA_CORE_CAP_PROT_USNIC 0x02000000 #define RDMA_CORE_PORT_IB_GRH_REQUIRED (RDMA_CORE_CAP_IB_GRH_REQUIRED \ | RDMA_CORE_CAP_PROT_ROCE \ | RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP) #define RDMA_CORE_PORT_IBA_IB (RDMA_CORE_CAP_PROT_IB \ | RDMA_CORE_CAP_IB_MAD \ | RDMA_CORE_CAP_IB_SMI \ | RDMA_CORE_CAP_IB_CM \ | RDMA_CORE_CAP_IB_SA \ | RDMA_CORE_CAP_AF_IB) #define RDMA_CORE_PORT_IBA_ROCE (RDMA_CORE_CAP_PROT_ROCE \ | RDMA_CORE_CAP_IB_MAD \ | RDMA_CORE_CAP_IB_CM \ | RDMA_CORE_CAP_AF_IB \ | RDMA_CORE_CAP_ETH_AH) #define RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP \ (RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP \ | RDMA_CORE_CAP_IB_MAD \ | RDMA_CORE_CAP_IB_CM \ | RDMA_CORE_CAP_AF_IB \ | RDMA_CORE_CAP_ETH_AH) #define RDMA_CORE_PORT_IWARP (RDMA_CORE_CAP_PROT_IWARP \ | RDMA_CORE_CAP_IW_CM) #define RDMA_CORE_PORT_INTEL_OPA (RDMA_CORE_PORT_IBA_IB \ | RDMA_CORE_CAP_OPA_MAD) #define RDMA_CORE_PORT_RAW_PACKET (RDMA_CORE_CAP_PROT_RAW_PACKET) #define RDMA_CORE_PORT_USNIC (RDMA_CORE_CAP_PROT_USNIC) struct ib_port_attr { u64 subnet_prefix; enum ib_port_state state; enum ib_mtu max_mtu; enum ib_mtu active_mtu; u32 phys_mtu; int gid_tbl_len; unsigned int ip_gids:1; /* This is the value from PortInfo CapabilityMask, defined by IBA */ u32 port_cap_flags; u32 max_msg_sz; u32 bad_pkey_cntr; u32 qkey_viol_cntr; u16 pkey_tbl_len; u32 sm_lid; u32 lid; u8 lmc; u8 max_vl_num; u8 sm_sl; u8 subnet_timeout; u8 init_type_reply; u8 active_width; u16 active_speed; u8 phys_state; u16 port_cap_flags2; }; enum ib_device_modify_flags { IB_DEVICE_MODIFY_SYS_IMAGE_GUID = 1 << 0, IB_DEVICE_MODIFY_NODE_DESC = 1 << 1 }; #define IB_DEVICE_NODE_DESC_MAX 64 struct ib_device_modify { u64 sys_image_guid; char node_desc[IB_DEVICE_NODE_DESC_MAX]; }; enum ib_port_modify_flags { IB_PORT_SHUTDOWN = 1, IB_PORT_INIT_TYPE = (1<<2), IB_PORT_RESET_QKEY_CNTR = (1<<3), IB_PORT_OPA_MASK_CHG = (1<<4) }; struct ib_port_modify { u32 set_port_cap_mask; u32 clr_port_cap_mask; u8 init_type; }; enum ib_event_type { IB_EVENT_CQ_ERR, IB_EVENT_QP_FATAL, IB_EVENT_QP_REQ_ERR, IB_EVENT_QP_ACCESS_ERR, IB_EVENT_COMM_EST, IB_EVENT_SQ_DRAINED, IB_EVENT_PATH_MIG, IB_EVENT_PATH_MIG_ERR, IB_EVENT_DEVICE_FATAL, IB_EVENT_PORT_ACTIVE, IB_EVENT_PORT_ERR, IB_EVENT_LID_CHANGE, IB_EVENT_PKEY_CHANGE, IB_EVENT_SM_CHANGE, IB_EVENT_SRQ_ERR, IB_EVENT_SRQ_LIMIT_REACHED, IB_EVENT_QP_LAST_WQE_REACHED, IB_EVENT_CLIENT_REREGISTER, IB_EVENT_GID_CHANGE, IB_EVENT_WQ_FATAL, }; const char *__attribute_const__ ib_event_msg(enum ib_event_type event); struct ib_event { struct ib_device *device; union { struct ib_cq *cq; struct ib_qp *qp; struct ib_srq *srq; struct ib_wq *wq; u32 port_num; } element; enum ib_event_type event; }; struct ib_event_handler { struct ib_device *device; void (*handler)(struct ib_event_handler *, struct ib_event *); struct list_head list; }; #define INIT_IB_EVENT_HANDLER(_ptr, _device, _handler) \ do { \ (_ptr)->device = _device; \ (_ptr)->handler = _handler; \ INIT_LIST_HEAD(&(_ptr)->list); \ } while (0) struct ib_global_route { const struct ib_gid_attr *sgid_attr; union ib_gid dgid; u32 flow_label; u8 sgid_index; u8 hop_limit; u8 traffic_class; }; struct ib_grh { __be32 version_tclass_flow; __be16 paylen; u8 next_hdr; u8 hop_limit; union ib_gid sgid; union ib_gid dgid; }; union rdma_network_hdr { struct ib_grh ibgrh; struct { /* The IB spec states that if it's IPv4, the header * is located in the last 20 bytes of the header. */ u8 reserved[20]; struct iphdr roce4grh; }; }; #define IB_QPN_MASK 0xFFFFFF enum { IB_MULTICAST_QPN = 0xffffff }; #define IB_LID_PERMISSIVE cpu_to_be16(0xFFFF) #define IB_MULTICAST_LID_BASE cpu_to_be16(0xC000) enum ib_ah_flags { IB_AH_GRH = 1 }; enum ib_rate { IB_RATE_PORT_CURRENT = 0, IB_RATE_2_5_GBPS = 2, IB_RATE_5_GBPS = 5, IB_RATE_10_GBPS = 3, IB_RATE_20_GBPS = 6, IB_RATE_30_GBPS = 4, IB_RATE_40_GBPS = 7, IB_RATE_60_GBPS = 8, IB_RATE_80_GBPS = 9, IB_RATE_120_GBPS = 10, IB_RATE_14_GBPS = 11, IB_RATE_56_GBPS = 12, IB_RATE_112_GBPS = 13, IB_RATE_168_GBPS = 14, IB_RATE_25_GBPS = 15, IB_RATE_100_GBPS = 16, IB_RATE_200_GBPS = 17, IB_RATE_300_GBPS = 18, IB_RATE_28_GBPS = 19, IB_RATE_50_GBPS = 20, IB_RATE_400_GBPS = 21, IB_RATE_600_GBPS = 22, }; /** * ib_rate_to_mult - Convert the IB rate enum to a multiple of the * base rate of 2.5 Gbit/sec. For example, IB_RATE_5_GBPS will be * converted to 2, since 5 Gbit/sec is 2 * 2.5 Gbit/sec. * @rate: rate to convert. */ __attribute_const__ int ib_rate_to_mult(enum ib_rate rate); /** * ib_rate_to_mbps - Convert the IB rate enum to Mbps. * For example, IB_RATE_2_5_GBPS will be converted to 2500. * @rate: rate to convert. */ __attribute_const__ int ib_rate_to_mbps(enum ib_rate rate); /** * enum ib_mr_type - memory region type * @IB_MR_TYPE_MEM_REG: memory region that is used for * normal registration * @IB_MR_TYPE_SG_GAPS: memory region that is capable to * register any arbitrary sg lists (without * the normal mr constraints - see * ib_map_mr_sg) * @IB_MR_TYPE_DM: memory region that is used for device * memory registration * @IB_MR_TYPE_USER: memory region that is used for the user-space * application * @IB_MR_TYPE_DMA: memory region that is used for DMA operations * without address translations (VA=PA) * @IB_MR_TYPE_INTEGRITY: memory region that is used for * data integrity operations */ enum ib_mr_type { IB_MR_TYPE_MEM_REG, IB_MR_TYPE_SG_GAPS, IB_MR_TYPE_DM, IB_MR_TYPE_USER, IB_MR_TYPE_DMA, IB_MR_TYPE_INTEGRITY, }; enum ib_mr_status_check { IB_MR_CHECK_SIG_STATUS = 1, }; /** * struct ib_mr_status - Memory region status container * * @fail_status: Bitmask of MR checks status. For each * failed check a corresponding status bit is set. * @sig_err: Additional info for IB_MR_CEHCK_SIG_STATUS * failure. */ struct ib_mr_status { u32 fail_status; struct ib_sig_err sig_err; }; /** * mult_to_ib_rate - Convert a multiple of 2.5 Gbit/sec to an IB rate * enum. * @mult: multiple to convert. */ __attribute_const__ enum ib_rate mult_to_ib_rate(int mult); struct rdma_ah_init_attr { struct rdma_ah_attr *ah_attr; u32 flags; struct net_device *xmit_slave; }; enum rdma_ah_attr_type { RDMA_AH_ATTR_TYPE_UNDEFINED, RDMA_AH_ATTR_TYPE_IB, RDMA_AH_ATTR_TYPE_ROCE, RDMA_AH_ATTR_TYPE_OPA, }; struct ib_ah_attr { u16 dlid; u8 src_path_bits; }; struct roce_ah_attr { u8 dmac[ETH_ALEN]; }; struct opa_ah_attr { u32 dlid; u8 src_path_bits; bool make_grd; }; struct rdma_ah_attr { struct ib_global_route grh; u8 sl; u8 static_rate; u32 port_num; u8 ah_flags; enum rdma_ah_attr_type type; union { struct ib_ah_attr ib; struct roce_ah_attr roce; struct opa_ah_attr opa; }; }; enum ib_wc_status { IB_WC_SUCCESS, IB_WC_LOC_LEN_ERR, IB_WC_LOC_QP_OP_ERR, IB_WC_LOC_EEC_OP_ERR, IB_WC_LOC_PROT_ERR, IB_WC_WR_FLUSH_ERR, IB_WC_MW_BIND_ERR, IB_WC_BAD_RESP_ERR, IB_WC_LOC_ACCESS_ERR, IB_WC_REM_INV_REQ_ERR, IB_WC_REM_ACCESS_ERR, IB_WC_REM_OP_ERR, IB_WC_RETRY_EXC_ERR, IB_WC_RNR_RETRY_EXC_ERR, IB_WC_LOC_RDD_VIOL_ERR, IB_WC_REM_INV_RD_REQ_ERR, IB_WC_REM_ABORT_ERR, IB_WC_INV_EECN_ERR, IB_WC_INV_EEC_STATE_ERR, IB_WC_FATAL_ERR, IB_WC_RESP_TIMEOUT_ERR, IB_WC_GENERAL_ERR }; const char *__attribute_const__ ib_wc_status_msg(enum ib_wc_status status); enum ib_wc_opcode { IB_WC_SEND = IB_UVERBS_WC_SEND, IB_WC_RDMA_WRITE = IB_UVERBS_WC_RDMA_WRITE, IB_WC_RDMA_READ = IB_UVERBS_WC_RDMA_READ, IB_WC_COMP_SWAP = IB_UVERBS_WC_COMP_SWAP, IB_WC_FETCH_ADD = IB_UVERBS_WC_FETCH_ADD, IB_WC_BIND_MW = IB_UVERBS_WC_BIND_MW, IB_WC_LOCAL_INV = IB_UVERBS_WC_LOCAL_INV, IB_WC_LSO = IB_UVERBS_WC_TSO, IB_WC_REG_MR, IB_WC_MASKED_COMP_SWAP, IB_WC_MASKED_FETCH_ADD, /* * Set value of IB_WC_RECV so consumers can test if a completion is a * receive by testing (opcode & IB_WC_RECV). */ IB_WC_RECV = 1 << 7, IB_WC_RECV_RDMA_WITH_IMM }; enum ib_wc_flags { IB_WC_GRH = 1, IB_WC_WITH_IMM = (1<<1), IB_WC_WITH_INVALIDATE = (1<<2), IB_WC_IP_CSUM_OK = (1<<3), IB_WC_WITH_SMAC = (1<<4), IB_WC_WITH_VLAN = (1<<5), IB_WC_WITH_NETWORK_HDR_TYPE = (1<<6), }; struct ib_wc { union { u64 wr_id; struct ib_cqe *wr_cqe; }; enum ib_wc_status status; enum ib_wc_opcode opcode; u32 vendor_err; u32 byte_len; struct ib_qp *qp; union { __be32 imm_data; u32 invalidate_rkey; } ex; u32 src_qp; u32 slid; int wc_flags; u16 pkey_index; u8 sl; u8 dlid_path_bits; u32 port_num; /* valid only for DR SMPs on switches */ u8 smac[ETH_ALEN]; u16 vlan_id; u8 network_hdr_type; }; enum ib_cq_notify_flags { IB_CQ_SOLICITED = 1 << 0, IB_CQ_NEXT_COMP = 1 << 1, IB_CQ_SOLICITED_MASK = IB_CQ_SOLICITED | IB_CQ_NEXT_COMP, IB_CQ_REPORT_MISSED_EVENTS = 1 << 2, }; enum ib_srq_type { IB_SRQT_BASIC = IB_UVERBS_SRQT_BASIC, IB_SRQT_XRC = IB_UVERBS_SRQT_XRC, IB_SRQT_TM = IB_UVERBS_SRQT_TM, }; static inline bool ib_srq_has_cq(enum ib_srq_type srq_type) { return srq_type == IB_SRQT_XRC || srq_type == IB_SRQT_TM; } enum ib_srq_attr_mask { IB_SRQ_MAX_WR = 1 << 0, IB_SRQ_LIMIT = 1 << 1, }; struct ib_srq_attr { u32 max_wr; u32 max_sge; u32 srq_limit; }; struct ib_srq_init_attr { void (*event_handler)(struct ib_event *, void *); void *srq_context; struct ib_srq_attr attr; enum ib_srq_type srq_type; struct { struct ib_cq *cq; union { struct { struct ib_xrcd *xrcd; } xrc; struct { u32 max_num_tags; } tag_matching; }; } ext; }; struct ib_qp_cap { u32 max_send_wr; u32 max_recv_wr; u32 max_send_sge; u32 max_recv_sge; u32 max_inline_data; /* * Maximum number of rdma_rw_ctx structures in flight at a time. * ib_create_qp() will calculate the right amount of neededed WRs * and MRs based on this. */ u32 max_rdma_ctxs; }; enum ib_sig_type { IB_SIGNAL_ALL_WR, IB_SIGNAL_REQ_WR }; enum ib_qp_type { /* * IB_QPT_SMI and IB_QPT_GSI have to be the first two entries * here (and in that order) since the MAD layer uses them as * indices into a 2-entry table. */ IB_QPT_SMI, IB_QPT_GSI, IB_QPT_RC = IB_UVERBS_QPT_RC, IB_QPT_UC = IB_UVERBS_QPT_UC, IB_QPT_UD = IB_UVERBS_QPT_UD, IB_QPT_RAW_IPV6, IB_QPT_RAW_ETHERTYPE, IB_QPT_RAW_PACKET = IB_UVERBS_QPT_RAW_PACKET, IB_QPT_XRC_INI = IB_UVERBS_QPT_XRC_INI, IB_QPT_XRC_TGT = IB_UVERBS_QPT_XRC_TGT, IB_QPT_MAX, IB_QPT_DRIVER = IB_UVERBS_QPT_DRIVER, /* Reserve a range for qp types internal to the low level driver. * These qp types will not be visible at the IB core layer, so the * IB_QPT_MAX usages should not be affected in the core layer */ IB_QPT_RESERVED1 = 0x1000, IB_QPT_RESERVED2, IB_QPT_RESERVED3, IB_QPT_RESERVED4, IB_QPT_RESERVED5, IB_QPT_RESERVED6, IB_QPT_RESERVED7, IB_QPT_RESERVED8, IB_QPT_RESERVED9, IB_QPT_RESERVED10, }; enum ib_qp_create_flags { IB_QP_CREATE_IPOIB_UD_LSO = 1 << 0, IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK = IB_UVERBS_QP_CREATE_BLOCK_MULTICAST_LOOPBACK, IB_QP_CREATE_CROSS_CHANNEL = 1 << 2, IB_QP_CREATE_MANAGED_SEND = 1 << 3, IB_QP_CREATE_MANAGED_RECV = 1 << 4, IB_QP_CREATE_NETIF_QP = 1 << 5, IB_QP_CREATE_INTEGRITY_EN = 1 << 6, IB_QP_CREATE_NETDEV_USE = 1 << 7, IB_QP_CREATE_SCATTER_FCS = IB_UVERBS_QP_CREATE_SCATTER_FCS, IB_QP_CREATE_CVLAN_STRIPPING = IB_UVERBS_QP_CREATE_CVLAN_STRIPPING, IB_QP_CREATE_SOURCE_QPN = 1 << 10, IB_QP_CREATE_PCI_WRITE_END_PADDING = IB_UVERBS_QP_CREATE_PCI_WRITE_END_PADDING, /* reserve bits 26-31 for low level drivers' internal use */ IB_QP_CREATE_RESERVED_START = 1 << 26, IB_QP_CREATE_RESERVED_END = 1 << 31, }; /* * Note: users may not call ib_close_qp or ib_destroy_qp from the event_handler * callback to destroy the passed in QP. */ struct ib_qp_init_attr { /* Consumer's event_handler callback must not block */ void (*event_handler)(struct ib_event *, void *); void *qp_context; struct ib_cq *send_cq; struct ib_cq *recv_cq; struct ib_srq *srq; struct ib_xrcd *xrcd; /* XRC TGT QPs only */ struct ib_qp_cap cap; enum ib_sig_type sq_sig_type; enum ib_qp_type qp_type; u32 create_flags; /* * Only needed for special QP types, or when using the RW API. */ u32 port_num; struct ib_rwq_ind_table *rwq_ind_tbl; u32 source_qpn; }; struct ib_qp_open_attr { void (*event_handler)(struct ib_event *, void *); void *qp_context; u32 qp_num; enum ib_qp_type qp_type; }; enum ib_rnr_timeout { IB_RNR_TIMER_655_36 = 0, IB_RNR_TIMER_000_01 = 1, IB_RNR_TIMER_000_02 = 2, IB_RNR_TIMER_000_03 = 3, IB_RNR_TIMER_000_04 = 4, IB_RNR_TIMER_000_06 = 5, IB_RNR_TIMER_000_08 = 6, IB_RNR_TIMER_000_12 = 7, IB_RNR_TIMER_000_16 = 8, IB_RNR_TIMER_000_24 = 9, IB_RNR_TIMER_000_32 = 10, IB_RNR_TIMER_000_48 = 11, IB_RNR_TIMER_000_64 = 12, IB_RNR_TIMER_000_96 = 13, IB_RNR_TIMER_001_28 = 14, IB_RNR_TIMER_001_92 = 15, IB_RNR_TIMER_002_56 = 16, IB_RNR_TIMER_003_84 = 17, IB_RNR_TIMER_005_12 = 18, IB_RNR_TIMER_007_68 = 19, IB_RNR_TIMER_010_24 = 20, IB_RNR_TIMER_015_36 = 21, IB_RNR_TIMER_020_48 = 22, IB_RNR_TIMER_030_72 = 23, IB_RNR_TIMER_040_96 = 24, IB_RNR_TIMER_061_44 = 25, IB_RNR_TIMER_081_92 = 26, IB_RNR_TIMER_122_88 = 27, IB_RNR_TIMER_163_84 = 28, IB_RNR_TIMER_245_76 = 29, IB_RNR_TIMER_327_68 = 30, IB_RNR_TIMER_491_52 = 31 }; enum ib_qp_attr_mask { IB_QP_STATE = 1, IB_QP_CUR_STATE = (1<<1), IB_QP_EN_SQD_ASYNC_NOTIFY = (1<<2), IB_QP_ACCESS_FLAGS = (1<<3), IB_QP_PKEY_INDEX = (1<<4), IB_QP_PORT = (1<<5), IB_QP_QKEY = (1<<6), IB_QP_AV = (1<<7), IB_QP_PATH_MTU = (1<<8), IB_QP_TIMEOUT = (1<<9), IB_QP_RETRY_CNT = (1<<10), IB_QP_RNR_RETRY = (1<<11), IB_QP_RQ_PSN = (1<<12), IB_QP_MAX_QP_RD_ATOMIC = (1<<13), IB_QP_ALT_PATH = (1<<14), IB_QP_MIN_RNR_TIMER = (1<<15), IB_QP_SQ_PSN = (1<<16), IB_QP_MAX_DEST_RD_ATOMIC = (1<<17), IB_QP_PATH_MIG_STATE = (1<<18), IB_QP_CAP = (1<<19), IB_QP_DEST_QPN = (1<<20), IB_QP_RESERVED1 = (1<<21), IB_QP_RESERVED2 = (1<<22), IB_QP_RESERVED3 = (1<<23), IB_QP_RESERVED4 = (1<<24), IB_QP_RATE_LIMIT = (1<<25), IB_QP_ATTR_STANDARD_BITS = GENMASK(20, 0), }; enum ib_qp_state { IB_QPS_RESET, IB_QPS_INIT, IB_QPS_RTR, IB_QPS_RTS, IB_QPS_SQD, IB_QPS_SQE, IB_QPS_ERR }; enum ib_mig_state { IB_MIG_MIGRATED, IB_MIG_REARM, IB_MIG_ARMED }; enum ib_mw_type { IB_MW_TYPE_1 = 1, IB_MW_TYPE_2 = 2 }; struct ib_qp_attr { enum ib_qp_state qp_state; enum ib_qp_state cur_qp_state; enum ib_mtu path_mtu; enum ib_mig_state path_mig_state; u32 qkey; u32 rq_psn; u32 sq_psn; u32 dest_qp_num; int qp_access_flags; struct ib_qp_cap cap; struct rdma_ah_attr ah_attr; struct rdma_ah_attr alt_ah_attr; u16 pkey_index; u16 alt_pkey_index; u8 en_sqd_async_notify; u8 sq_draining; u8 max_rd_atomic; u8 max_dest_rd_atomic; u8 min_rnr_timer; u32 port_num; u8 timeout; u8 retry_cnt; u8 rnr_retry; u32 alt_port_num; u8 alt_timeout; u32 rate_limit; struct net_device *xmit_slave; }; enum ib_wr_opcode { /* These are shared with userspace */ IB_WR_RDMA_WRITE = IB_UVERBS_WR_RDMA_WRITE, IB_WR_RDMA_WRITE_WITH_IMM = IB_UVERBS_WR_RDMA_WRITE_WITH_IMM, IB_WR_SEND = IB_UVERBS_WR_SEND, IB_WR_SEND_WITH_IMM = IB_UVERBS_WR_SEND_WITH_IMM, IB_WR_RDMA_READ = IB_UVERBS_WR_RDMA_READ, IB_WR_ATOMIC_CMP_AND_SWP = IB_UVERBS_WR_ATOMIC_CMP_AND_SWP, IB_WR_ATOMIC_FETCH_AND_ADD = IB_UVERBS_WR_ATOMIC_FETCH_AND_ADD, IB_WR_BIND_MW = IB_UVERBS_WR_BIND_MW, IB_WR_LSO = IB_UVERBS_WR_TSO, IB_WR_SEND_WITH_INV = IB_UVERBS_WR_SEND_WITH_INV, IB_WR_RDMA_READ_WITH_INV = IB_UVERBS_WR_RDMA_READ_WITH_INV, IB_WR_LOCAL_INV = IB_UVERBS_WR_LOCAL_INV, IB_WR_MASKED_ATOMIC_CMP_AND_SWP = IB_UVERBS_WR_MASKED_ATOMIC_CMP_AND_SWP, IB_WR_MASKED_ATOMIC_FETCH_AND_ADD = IB_UVERBS_WR_MASKED_ATOMIC_FETCH_AND_ADD, /* These are kernel only and can not be issued by userspace */ IB_WR_REG_MR = 0x20, IB_WR_REG_MR_INTEGRITY, /* reserve values for low level drivers' internal use. * These values will not be used at all in the ib core layer. */ IB_WR_RESERVED1 = 0xf0, IB_WR_RESERVED2, IB_WR_RESERVED3, IB_WR_RESERVED4, IB_WR_RESERVED5, IB_WR_RESERVED6, IB_WR_RESERVED7, IB_WR_RESERVED8, IB_WR_RESERVED9, IB_WR_RESERVED10, }; enum ib_send_flags { IB_SEND_FENCE = 1, IB_SEND_SIGNALED = (1<<1), IB_SEND_SOLICITED = (1<<2), IB_SEND_INLINE = (1<<3), IB_SEND_IP_CSUM = (1<<4), /* reserve bits 26-31 for low level drivers' internal use */ IB_SEND_RESERVED_START = (1 << 26), IB_SEND_RESERVED_END = (1 << 31), }; struct ib_sge { u64 addr; u32 length; u32 lkey; }; struct ib_cqe { void (*done)(struct ib_cq *cq, struct ib_wc *wc); }; struct ib_send_wr { struct ib_send_wr *next; union { u64 wr_id; struct ib_cqe *wr_cqe; }; struct ib_sge *sg_list; int num_sge; enum ib_wr_opcode opcode; int send_flags; union { __be32 imm_data; u32 invalidate_rkey; } ex; }; struct ib_rdma_wr { struct ib_send_wr wr; u64 remote_addr; u32 rkey; }; static inline const struct ib_rdma_wr *rdma_wr(const struct ib_send_wr *wr) { return container_of(wr, struct ib_rdma_wr, wr); } struct ib_atomic_wr { struct ib_send_wr wr; u64 remote_addr; u64 compare_add; u64 swap; u64 compare_add_mask; u64 swap_mask; u32 rkey; }; static inline const struct ib_atomic_wr *atomic_wr(const struct ib_send_wr *wr) { return container_of(wr, struct ib_atomic_wr, wr); } struct ib_ud_wr { struct ib_send_wr wr; struct ib_ah *ah; void *header; int hlen; int mss; u32 remote_qpn; u32 remote_qkey; u16 pkey_index; /* valid for GSI only */ u32 port_num; /* valid for DR SMPs on switch only */ }; static inline const struct ib_ud_wr *ud_wr(const struct ib_send_wr *wr) { return container_of(wr, struct ib_ud_wr, wr); } struct ib_reg_wr { struct ib_send_wr wr; struct ib_mr *mr; u32 key; int access; }; static inline const struct ib_reg_wr *reg_wr(const struct ib_send_wr *wr) { return container_of(wr, struct ib_reg_wr, wr); } struct ib_recv_wr { struct ib_recv_wr *next; union { u64 wr_id; struct ib_cqe *wr_cqe; }; struct ib_sge *sg_list; int num_sge; }; enum ib_access_flags { IB_ACCESS_LOCAL_WRITE = IB_UVERBS_ACCESS_LOCAL_WRITE, IB_ACCESS_REMOTE_WRITE = IB_UVERBS_ACCESS_REMOTE_WRITE, IB_ACCESS_REMOTE_READ = IB_UVERBS_ACCESS_REMOTE_READ, IB_ACCESS_REMOTE_ATOMIC = IB_UVERBS_ACCESS_REMOTE_ATOMIC, IB_ACCESS_MW_BIND = IB_UVERBS_ACCESS_MW_BIND, IB_ZERO_BASED = IB_UVERBS_ACCESS_ZERO_BASED, IB_ACCESS_ON_DEMAND = IB_UVERBS_ACCESS_ON_DEMAND, IB_ACCESS_HUGETLB = IB_UVERBS_ACCESS_HUGETLB, IB_ACCESS_RELAXED_ORDERING = IB_UVERBS_ACCESS_RELAXED_ORDERING, IB_ACCESS_OPTIONAL = IB_UVERBS_ACCESS_OPTIONAL_RANGE, IB_ACCESS_SUPPORTED = ((IB_ACCESS_HUGETLB << 1) - 1) | IB_ACCESS_OPTIONAL, }; /* * XXX: these are apparently used for ->rereg_user_mr, no idea why they * are hidden here instead of a uapi header! */ enum ib_mr_rereg_flags { IB_MR_REREG_TRANS = 1, IB_MR_REREG_PD = (1<<1), IB_MR_REREG_ACCESS = (1<<2), IB_MR_REREG_SUPPORTED = ((IB_MR_REREG_ACCESS << 1) - 1) }; struct ib_umem; enum rdma_remove_reason { /* * Userspace requested uobject deletion or initial try * to remove uobject via cleanup. Call could fail */ RDMA_REMOVE_DESTROY, /* Context deletion. This call should delete the actual object itself */ RDMA_REMOVE_CLOSE, /* Driver is being hot-unplugged. This call should delete the actual object itself */ RDMA_REMOVE_DRIVER_REMOVE, /* uobj is being cleaned-up before being committed */ RDMA_REMOVE_ABORT, /* The driver failed to destroy the uobject and is being disconnected */ RDMA_REMOVE_DRIVER_FAILURE, }; struct ib_rdmacg_object { #ifdef CONFIG_CGROUP_RDMA struct rdma_cgroup *cg; /* owner rdma cgroup */ #endif }; struct ib_ucontext { struct ib_device *device; struct ib_uverbs_file *ufile; struct ib_rdmacg_object cg_obj; /* * Implementation details of the RDMA core, don't use in drivers: */ struct rdma_restrack_entry res; struct xarray mmap_xa; }; struct ib_uobject { u64 user_handle; /* handle given to us by userspace */ /* ufile & ucontext owning this object */ struct ib_uverbs_file *ufile; /* FIXME, save memory: ufile->context == context */ struct ib_ucontext *context; /* associated user context */ void *object; /* containing object */ struct list_head list; /* link to context's list */ struct ib_rdmacg_object cg_obj; /* rdmacg object */ int id; /* index into kernel idr */ struct kref ref; atomic_t usecnt; /* protects exclusive access */ struct rcu_head rcu; /* kfree_rcu() overhead */ const struct uverbs_api_object *uapi_object; }; struct ib_udata { const void __user *inbuf; void __user *outbuf; size_t inlen; size_t outlen; }; struct ib_pd { u32 local_dma_lkey; u32 flags; struct ib_device *device; struct ib_uobject *uobject; atomic_t usecnt; /* count all resources */ u32 unsafe_global_rkey; /* * Implementation details of the RDMA core, don't use in drivers: */ struct ib_mr *__internal_mr; struct rdma_restrack_entry res; }; struct ib_xrcd { struct ib_device *device; atomic_t usecnt; /* count all exposed resources */ struct inode *inode; struct rw_semaphore tgt_qps_rwsem; struct xarray tgt_qps; }; struct ib_ah { struct ib_device *device; struct ib_pd *pd; struct ib_uobject *uobject; const struct ib_gid_attr *sgid_attr; enum rdma_ah_attr_type type; }; typedef void (*ib_comp_handler)(struct ib_cq *cq, void *cq_context); enum ib_poll_context { IB_POLL_SOFTIRQ, /* poll from softirq context */ IB_POLL_WORKQUEUE, /* poll from workqueue */ IB_POLL_UNBOUND_WORKQUEUE, /* poll from unbound workqueue */ IB_POLL_LAST_POOL_TYPE = IB_POLL_UNBOUND_WORKQUEUE, IB_POLL_DIRECT, /* caller context, no hw completions */ }; struct ib_cq { struct ib_device *device; struct ib_ucq_object *uobject; ib_comp_handler comp_handler; void (*event_handler)(struct ib_event *, void *); void *cq_context; int cqe; unsigned int cqe_used; atomic_t usecnt; /* count number of work queues */ enum ib_poll_context poll_ctx; struct ib_wc *wc; struct list_head pool_entry; union { struct irq_poll iop; struct work_struct work; }; struct workqueue_struct *comp_wq; struct dim *dim; /* updated only by trace points */ ktime_t timestamp; u8 interrupt:1; u8 shared:1; unsigned int comp_vector; /* * Implementation details of the RDMA core, don't use in drivers: */ struct rdma_restrack_entry res; }; struct ib_srq { struct ib_device *device; struct ib_pd *pd; struct ib_usrq_object *uobject; void (*event_handler)(struct ib_event *, void *); void *srq_context; enum ib_srq_type srq_type; atomic_t usecnt; struct { struct ib_cq *cq; union { struct { struct ib_xrcd *xrcd; u32 srq_num; } xrc; }; } ext; /* * Implementation details of the RDMA core, don't use in drivers: */ struct rdma_restrack_entry res; }; enum ib_raw_packet_caps { /* Strip cvlan from incoming packet and report it in the matching work * completion is supported. */ IB_RAW_PACKET_CAP_CVLAN_STRIPPING = (1 << 0), /* Scatter FCS field of an incoming packet to host memory is supported. */ IB_RAW_PACKET_CAP_SCATTER_FCS = (1 << 1), /* Checksum offloads are supported (for both send and receive). */ IB_RAW_PACKET_CAP_IP_CSUM = (1 << 2), /* When a packet is received for an RQ with no receive WQEs, the * packet processing is delayed. */ IB_RAW_PACKET_CAP_DELAY_DROP = (1 << 3), }; enum ib_wq_type { IB_WQT_RQ = IB_UVERBS_WQT_RQ, }; enum ib_wq_state { IB_WQS_RESET, IB_WQS_RDY, IB_WQS_ERR }; struct ib_wq { struct ib_device *device; struct ib_uwq_object *uobject; void *wq_context; void (*event_handler)(struct ib_event *, void *); struct ib_pd *pd; struct ib_cq *cq; u32 wq_num; enum ib_wq_state state; enum ib_wq_type wq_type; atomic_t usecnt; }; enum ib_wq_flags { IB_WQ_FLAGS_CVLAN_STRIPPING = IB_UVERBS_WQ_FLAGS_CVLAN_STRIPPING, IB_WQ_FLAGS_SCATTER_FCS = IB_UVERBS_WQ_FLAGS_SCATTER_FCS, IB_WQ_FLAGS_DELAY_DROP = IB_UVERBS_WQ_FLAGS_DELAY_DROP, IB_WQ_FLAGS_PCI_WRITE_END_PADDING = IB_UVERBS_WQ_FLAGS_PCI_WRITE_END_PADDING, }; struct ib_wq_init_attr { void *wq_context; enum ib_wq_type wq_type; u32 max_wr; u32 max_sge; struct ib_cq *cq; void (*event_handler)(struct ib_event *, void *); u32 create_flags; /* Use enum ib_wq_flags */ }; enum ib_wq_attr_mask { IB_WQ_STATE = 1 << 0, IB_WQ_CUR_STATE = 1 << 1, IB_WQ_FLAGS = 1 << 2, }; struct ib_wq_attr { enum ib_wq_state wq_state; enum ib_wq_state curr_wq_state; u32 flags; /* Use enum ib_wq_flags */ u32 flags_mask; /* Use enum ib_wq_flags */ }; struct ib_rwq_ind_table { struct ib_device *device; struct ib_uobject *uobject; atomic_t usecnt; u32 ind_tbl_num; u32 log_ind_tbl_size; struct ib_wq **ind_tbl; }; struct ib_rwq_ind_table_init_attr { u32 log_ind_tbl_size; /* Each entry is a pointer to Receive Work Queue */ struct ib_wq **ind_tbl; }; enum port_pkey_state { IB_PORT_PKEY_NOT_VALID = 0, IB_PORT_PKEY_VALID = 1, IB_PORT_PKEY_LISTED = 2, }; struct ib_qp_security; struct ib_port_pkey { enum port_pkey_state state; u16 pkey_index; u32 port_num; struct list_head qp_list; struct list_head to_error_list; struct ib_qp_security *sec; }; struct ib_ports_pkeys { struct ib_port_pkey main; struct ib_port_pkey alt; }; struct ib_qp_security { struct ib_qp *qp; struct ib_device *dev; /* Hold this mutex when changing port and pkey settings. */ struct mutex mutex; struct ib_ports_pkeys *ports_pkeys; /* A list of all open shared QP handles. Required to enforce security * properly for all users of a shared QP. */ struct list_head shared_qp_list; void *security; bool destroying; atomic_t error_list_count; struct completion error_complete; int error_comps_pending; }; /* * @max_write_sge: Maximum SGE elements per RDMA WRITE request. * @max_read_sge: Maximum SGE elements per RDMA READ request. */ struct ib_qp { struct ib_device *device; struct ib_pd *pd; struct ib_cq *send_cq; struct ib_cq *recv_cq; spinlock_t mr_lock; int mrs_used; struct list_head rdma_mrs; struct list_head sig_mrs; struct ib_srq *srq; struct ib_xrcd *xrcd; /* XRC TGT QPs only */ struct list_head xrcd_list; /* count times opened, mcast attaches, flow attaches */ atomic_t usecnt; struct list_head open_list; struct ib_qp *real_qp; struct ib_uqp_object *uobject; void (*event_handler)(struct ib_event *, void *); void *qp_context; /* sgid_attrs associated with the AV's */ const struct ib_gid_attr *av_sgid_attr; const struct ib_gid_attr *alt_path_sgid_attr; u32 qp_num; u32 max_write_sge; u32 max_read_sge; enum ib_qp_type qp_type; struct ib_rwq_ind_table *rwq_ind_tbl; struct ib_qp_security *qp_sec; u32 port; bool integrity_en; /* * Implementation details of the RDMA core, don't use in drivers: */ struct rdma_restrack_entry res; /* The counter the qp is bind to */ struct rdma_counter *counter; }; struct ib_dm { struct ib_device *device; u32 length; u32 flags; struct ib_uobject *uobject; atomic_t usecnt; }; struct ib_mr { struct ib_device *device; struct ib_pd *pd; u32 lkey; u32 rkey; u64 iova; u64 length; unsigned int page_size; enum ib_mr_type type; bool need_inval; union { struct ib_uobject *uobject; /* user */ struct list_head qp_entry; /* FR */ }; struct ib_dm *dm; struct ib_sig_attrs *sig_attrs; /* only for IB_MR_TYPE_INTEGRITY MRs */ /* * Implementation details of the RDMA core, don't use in drivers: */ struct rdma_restrack_entry res; }; struct ib_mw { struct ib_device *device; struct ib_pd *pd; struct ib_uobject *uobject; u32 rkey; enum ib_mw_type type; }; /* Supported steering options */ enum ib_flow_attr_type { /* steering according to rule specifications */ IB_FLOW_ATTR_NORMAL = 0x0, /* default unicast and multicast rule - * receive all Eth traffic which isn't steered to any QP */ IB_FLOW_ATTR_ALL_DEFAULT = 0x1, /* default multicast rule - * receive all Eth multicast traffic which isn't steered to any QP */ IB_FLOW_ATTR_MC_DEFAULT = 0x2, /* sniffer rule - receive all port traffic */ IB_FLOW_ATTR_SNIFFER = 0x3 }; /* Supported steering header types */ enum ib_flow_spec_type { /* L2 headers*/ IB_FLOW_SPEC_ETH = 0x20, IB_FLOW_SPEC_IB = 0x22, /* L3 header*/ IB_FLOW_SPEC_IPV4 = 0x30, IB_FLOW_SPEC_IPV6 = 0x31, IB_FLOW_SPEC_ESP = 0x34, /* L4 headers*/ IB_FLOW_SPEC_TCP = 0x40, IB_FLOW_SPEC_UDP = 0x41, IB_FLOW_SPEC_VXLAN_TUNNEL = 0x50, IB_FLOW_SPEC_GRE = 0x51, IB_FLOW_SPEC_MPLS = 0x60, IB_FLOW_SPEC_INNER = 0x100, /* Actions */ IB_FLOW_SPEC_ACTION_TAG = 0x1000, IB_FLOW_SPEC_ACTION_DROP = 0x1001, IB_FLOW_SPEC_ACTION_HANDLE = 0x1002, IB_FLOW_SPEC_ACTION_COUNT = 0x1003, }; #define IB_FLOW_SPEC_LAYER_MASK 0xF0 #define IB_FLOW_SPEC_SUPPORT_LAYERS 10 enum ib_flow_flags { IB_FLOW_ATTR_FLAGS_DONT_TRAP = 1UL << 1, /* Continue match, no steal */ IB_FLOW_ATTR_FLAGS_EGRESS = 1UL << 2, /* Egress flow */ IB_FLOW_ATTR_FLAGS_RESERVED = 1UL << 3 /* Must be last */ }; struct ib_flow_eth_filter { u8 dst_mac[6]; u8 src_mac[6]; __be16 ether_type; __be16 vlan_tag; /* Must be last */ u8 real_sz[]; }; struct ib_flow_spec_eth { u32 type; u16 size; struct ib_flow_eth_filter val; struct ib_flow_eth_filter mask; }; struct ib_flow_ib_filter { __be16 dlid; __u8 sl; /* Must be last */ u8 real_sz[]; }; struct ib_flow_spec_ib { u32 type; u16 size; struct ib_flow_ib_filter val; struct ib_flow_ib_filter mask; }; /* IPv4 header flags */ enum ib_ipv4_flags { IB_IPV4_DONT_FRAG = 0x2, /* Don't enable packet fragmentation */ IB_IPV4_MORE_FRAG = 0X4 /* For All fragmented packets except the last have this flag set */ }; struct ib_flow_ipv4_filter { __be32 src_ip; __be32 dst_ip; u8 proto; u8 tos; u8 ttl; u8 flags; /* Must be last */ u8 real_sz[]; }; struct ib_flow_spec_ipv4 { u32 type; u16 size; struct ib_flow_ipv4_filter val; struct ib_flow_ipv4_filter mask; }; struct ib_flow_ipv6_filter { u8 src_ip[16]; u8 dst_ip[16]; __be32 flow_label; u8 next_hdr; u8 traffic_class; u8 hop_limit; /* Must be last */ u8 real_sz[]; }; struct ib_flow_spec_ipv6 { u32 type; u16 size; struct ib_flow_ipv6_filter val; struct ib_flow_ipv6_filter mask; }; struct ib_flow_tcp_udp_filter { __be16 dst_port; __be16 src_port; /* Must be last */ u8 real_sz[]; }; struct ib_flow_spec_tcp_udp { u32 type; u16 size; struct ib_flow_tcp_udp_filter val; struct ib_flow_tcp_udp_filter mask; }; struct ib_flow_tunnel_filter { __be32 tunnel_id; u8 real_sz[]; }; /* ib_flow_spec_tunnel describes the Vxlan tunnel * the tunnel_id from val has the vni value */ struct ib_flow_spec_tunnel { u32 type; u16 size; struct ib_flow_tunnel_filter val; struct ib_flow_tunnel_filter mask; }; struct ib_flow_esp_filter { __be32 spi; __be32 seq; /* Must be last */ u8 real_sz[]; }; struct ib_flow_spec_esp { u32 type; u16 size; struct ib_flow_esp_filter val; struct ib_flow_esp_filter mask; }; struct ib_flow_gre_filter { __be16 c_ks_res0_ver; __be16 protocol; __be32 key; /* Must be last */ u8 real_sz[]; }; struct ib_flow_spec_gre { u32 type; u16 size; struct ib_flow_gre_filter val; struct ib_flow_gre_filter mask; }; struct ib_flow_mpls_filter { __be32 tag; /* Must be last */ u8 real_sz[]; }; struct ib_flow_spec_mpls { u32 type; u16 size; struct ib_flow_mpls_filter val; struct ib_flow_mpls_filter mask; }; struct ib_flow_spec_action_tag { enum ib_flow_spec_type type; u16 size; u32 tag_id; }; struct ib_flow_spec_action_drop { enum ib_flow_spec_type type; u16 size; }; struct ib_flow_spec_action_handle { enum ib_flow_spec_type type; u16 size; struct ib_flow_action *act; }; enum ib_counters_description { IB_COUNTER_PACKETS, IB_COUNTER_BYTES, }; struct ib_flow_spec_action_count { enum ib_flow_spec_type type; u16 size; struct ib_counters *counters; }; union ib_flow_spec { struct { u32 type; u16 size; }; struct ib_flow_spec_eth eth; struct ib_flow_spec_ib ib; struct ib_flow_spec_ipv4 ipv4; struct ib_flow_spec_tcp_udp tcp_udp; struct ib_flow_spec_ipv6 ipv6; struct ib_flow_spec_tunnel tunnel; struct ib_flow_spec_esp esp; struct ib_flow_spec_gre gre; struct ib_flow_spec_mpls mpls; struct ib_flow_spec_action_tag flow_tag; struct ib_flow_spec_action_drop drop; struct ib_flow_spec_action_handle action; struct ib_flow_spec_action_count flow_count; }; struct ib_flow_attr { enum ib_flow_attr_type type; u16 size; u16 priority; u32 flags; u8 num_of_specs; u32 port; union ib_flow_spec flows[]; }; struct ib_flow { struct ib_qp *qp; struct ib_device *device; struct ib_uobject *uobject; }; enum ib_flow_action_type { IB_FLOW_ACTION_UNSPECIFIED, IB_FLOW_ACTION_ESP = 1, }; struct ib_flow_action_attrs_esp_keymats { enum ib_uverbs_flow_action_esp_keymat protocol; union { struct ib_uverbs_flow_action_esp_keymat_aes_gcm aes_gcm; } keymat; }; struct ib_flow_action_attrs_esp_replays { enum ib_uverbs_flow_action_esp_replay protocol; union { struct ib_uverbs_flow_action_esp_replay_bmp bmp; } replay; }; enum ib_flow_action_attrs_esp_flags { /* All user-space flags at the top: Use enum ib_uverbs_flow_action_esp_flags * This is done in order to share the same flags between user-space and * kernel and spare an unnecessary translation. */ /* Kernel flags */ IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED = 1ULL << 32, IB_FLOW_ACTION_ESP_FLAGS_MOD_ESP_ATTRS = 1ULL << 33, }; struct ib_flow_spec_list { struct ib_flow_spec_list *next; union ib_flow_spec spec; }; struct ib_flow_action_attrs_esp { struct ib_flow_action_attrs_esp_keymats *keymat; struct ib_flow_action_attrs_esp_replays *replay; struct ib_flow_spec_list *encap; /* Used only if IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED is enabled. * Value of 0 is a valid value. */ u32 esn; u32 spi; u32 seq; u32 tfc_pad; /* Use enum ib_flow_action_attrs_esp_flags */ u64 flags; u64 hard_limit_pkts; }; struct ib_flow_action { struct ib_device *device; struct ib_uobject *uobject; enum ib_flow_action_type type; atomic_t usecnt; }; struct ib_mad; enum ib_process_mad_flags { IB_MAD_IGNORE_MKEY = 1, IB_MAD_IGNORE_BKEY = 2, IB_MAD_IGNORE_ALL = IB_MAD_IGNORE_MKEY | IB_MAD_IGNORE_BKEY }; enum ib_mad_result { IB_MAD_RESULT_FAILURE = 0, /* (!SUCCESS is the important flag) */ IB_MAD_RESULT_SUCCESS = 1 << 0, /* MAD was successfully processed */ IB_MAD_RESULT_REPLY = 1 << 1, /* Reply packet needs to be sent */ IB_MAD_RESULT_CONSUMED = 1 << 2 /* Packet consumed: stop processing */ }; struct ib_port_cache { u64 subnet_prefix; struct ib_pkey_cache *pkey; struct ib_gid_table *gid; u8 lmc; enum ib_port_state port_state; }; struct ib_port_immutable { int pkey_tbl_len; int gid_tbl_len; u32 core_cap_flags; u32 max_mad_size; }; struct ib_port_data { struct ib_device *ib_dev; struct ib_port_immutable immutable; spinlock_t pkey_list_lock; spinlock_t netdev_lock; struct list_head pkey_list; struct ib_port_cache cache; struct net_device __rcu *netdev; struct hlist_node ndev_hash_link; struct rdma_port_counter port_counter; struct ib_port *sysfs; }; /* rdma netdev type - specifies protocol type */ enum rdma_netdev_t { RDMA_NETDEV_OPA_VNIC, RDMA_NETDEV_IPOIB, }; /** * struct rdma_netdev - rdma netdev * For cases where netstack interfacing is required. */ struct rdma_netdev { void *clnt_priv; struct ib_device *hca; u32 port_num; int mtu; /* * cleanup function must be specified. * FIXME: This is only used for OPA_VNIC and that usage should be * removed too. */ void (*free_rdma_netdev)(struct net_device *netdev); /* control functions */ void (*set_id)(struct net_device *netdev, int id); /* send packet */ int (*send)(struct net_device *dev, struct sk_buff *skb, struct ib_ah *address, u32 dqpn); /* multicast */ int (*attach_mcast)(struct net_device *dev, struct ib_device *hca, union ib_gid *gid, u16 mlid, int set_qkey, u32 qkey); int (*detach_mcast)(struct net_device *dev, struct ib_device *hca, union ib_gid *gid, u16 mlid); /* timeout */ void (*tx_timeout)(struct net_device *dev, unsigned int txqueue); }; struct rdma_netdev_alloc_params { size_t sizeof_priv; unsigned int txqs; unsigned int rxqs; void *param; int (*initialize_rdma_netdev)(struct ib_device *device, u32 port_num, struct net_device *netdev, void *param); }; struct ib_odp_counters { atomic64_t faults; atomic64_t invalidations; atomic64_t prefetch; }; struct ib_counters { struct ib_device *device; struct ib_uobject *uobject; /* num of objects attached */ atomic_t usecnt; }; struct ib_counters_read_attr { u64 *counters_buff; u32 ncounters; u32 flags; /* use enum ib_read_counters_flags */ }; struct uverbs_attr_bundle; struct iw_cm_id; struct iw_cm_conn_param; #define INIT_RDMA_OBJ_SIZE(ib_struct, drv_struct, member) \ .size_##ib_struct = \ (sizeof(struct drv_struct) + \ BUILD_BUG_ON_ZERO(offsetof(struct drv_struct, member)) + \ BUILD_BUG_ON_ZERO( \ !__same_type(((struct drv_struct *)NULL)->member, \ struct ib_struct))) #define rdma_zalloc_drv_obj_gfp(ib_dev, ib_type, gfp) \ ((struct ib_type *)rdma_zalloc_obj(ib_dev, ib_dev->ops.size_##ib_type, \ gfp, false)) #define rdma_zalloc_drv_obj_numa(ib_dev, ib_type) \ ((struct ib_type *)rdma_zalloc_obj(ib_dev, ib_dev->ops.size_##ib_type, \ GFP_KERNEL, true)) #define rdma_zalloc_drv_obj(ib_dev, ib_type) \ rdma_zalloc_drv_obj_gfp(ib_dev, ib_type, GFP_KERNEL) #define DECLARE_RDMA_OBJ_SIZE(ib_struct) size_t size_##ib_struct struct rdma_user_mmap_entry { struct kref ref; struct ib_ucontext *ucontext; unsigned long start_pgoff; size_t npages; bool driver_removed; }; /* Return the offset (in bytes) the user should pass to libc's mmap() */ static inline u64 rdma_user_mmap_get_offset(const struct rdma_user_mmap_entry *entry) { return (u64)entry->start_pgoff << PAGE_SHIFT; } /** * struct ib_device_ops - InfiniBand device operations * This structure defines all the InfiniBand device operations, providers will * need to define the supported operations, otherwise they will be set to null. */ struct ib_device_ops { struct module *owner; enum rdma_driver_id driver_id; u32 uverbs_abi_ver; unsigned int uverbs_no_driver_id_binding:1; /* * NOTE: New drivers should not make use of device_group; instead new * device parameter should be exposed via netlink command. This * mechanism exists only for existing drivers. */ const struct attribute_group *device_group; const struct attribute_group **port_groups; int (*post_send)(struct ib_qp *qp, const struct ib_send_wr *send_wr, const struct ib_send_wr **bad_send_wr); int (*post_recv)(struct ib_qp *qp, const struct ib_recv_wr *recv_wr, const struct ib_recv_wr **bad_recv_wr); void (*drain_rq)(struct ib_qp *qp); void (*drain_sq)(struct ib_qp *qp); int (*poll_cq)(struct ib_cq *cq, int num_entries, struct ib_wc *wc); int (*peek_cq)(struct ib_cq *cq, int wc_cnt); int (*req_notify_cq)(struct ib_cq *cq, enum ib_cq_notify_flags flags); int (*post_srq_recv)(struct ib_srq *srq, const struct ib_recv_wr *recv_wr, const struct ib_recv_wr **bad_recv_wr); int (*process_mad)(struct ib_device *device, int process_mad_flags, u32 port_num, const struct ib_wc *in_wc, const struct ib_grh *in_grh, const struct ib_mad *in_mad, struct ib_mad *out_mad, size_t *out_mad_size, u16 *out_mad_pkey_index); int (*query_device)(struct ib_device *device, struct ib_device_attr *device_attr, struct ib_udata *udata); int (*modify_device)(struct ib_device *device, int device_modify_mask, struct ib_device_modify *device_modify); void (*get_dev_fw_str)(struct ib_device *device, char *str); const struct cpumask *(*get_vector_affinity)(struct ib_device *ibdev, int comp_vector); int (*query_port)(struct ib_device *device, u32 port_num, struct ib_port_attr *port_attr); int (*modify_port)(struct ib_device *device, u32 port_num, int port_modify_mask, struct ib_port_modify *port_modify); /** * The following mandatory functions are used only at device * registration. Keep functions such as these at the end of this * structure to avoid cache line misses when accessing struct ib_device * in fast paths. */ int (*get_port_immutable)(struct ib_device *device, u32 port_num, struct ib_port_immutable *immutable); enum rdma_link_layer (*get_link_layer)(struct ib_device *device, u32 port_num); /** * When calling get_netdev, the HW vendor's driver should return the * net device of device @device at port @port_num or NULL if such * a net device doesn't exist. The vendor driver should call dev_hold * on this net device. The HW vendor's device driver must guarantee * that this function returns NULL before the net device has finished * NETDEV_UNREGISTER state. */ struct net_device *(*get_netdev)(struct ib_device *device, u32 port_num); /** * rdma netdev operation * * Driver implementing alloc_rdma_netdev or rdma_netdev_get_params * must return -EOPNOTSUPP if it doesn't support the specified type. */ struct net_device *(*alloc_rdma_netdev)( struct ib_device *device, u32 port_num, enum rdma_netdev_t type, const char *name, unsigned char name_assign_type, void (*setup)(struct net_device *)); int (*rdma_netdev_get_params)(struct ib_device *device, u32 port_num, enum rdma_netdev_t type, struct rdma_netdev_alloc_params *params); /** * query_gid should be return GID value for @device, when @port_num * link layer is either IB or iWarp. It is no-op if @port_num port * is RoCE link layer. */ int (*query_gid)(struct ib_device *device, u32 port_num, int index, union ib_gid *gid); /** * When calling add_gid, the HW vendor's driver should add the gid * of device of port at gid index available at @attr. Meta-info of * that gid (for example, the network device related to this gid) is * available at @attr. @context allows the HW vendor driver to store * extra information together with a GID entry. The HW vendor driver may * allocate memory to contain this information and store it in @context * when a new GID entry is written to. Params are consistent until the * next call of add_gid or delete_gid. The function should return 0 on * success or error otherwise. The function could be called * concurrently for different ports. This function is only called when * roce_gid_table is used. */ int (*add_gid)(const struct ib_gid_attr *attr, void **context); /** * When calling del_gid, the HW vendor's driver should delete the * gid of device @device at gid index gid_index of port port_num * available in @attr. * Upon the deletion of a GID entry, the HW vendor must free any * allocated memory. The caller will clear @context afterwards. * This function is only called when roce_gid_table is used. */ int (*del_gid)(const struct ib_gid_attr *attr, void **context); int (*query_pkey)(struct ib_device *device, u32 port_num, u16 index, u16 *pkey); int (*alloc_ucontext)(struct ib_ucontext *context, struct ib_udata *udata); void (*dealloc_ucontext)(struct ib_ucontext *context); int (*mmap)(struct ib_ucontext *context, struct vm_area_struct *vma); /** * This will be called once refcount of an entry in mmap_xa reaches * zero. The type of the memory that was mapped may differ between * entries and is opaque to the rdma_user_mmap interface. * Therefore needs to be implemented by the driver in mmap_free. */ void (*mmap_free)(struct rdma_user_mmap_entry *entry); void (*disassociate_ucontext)(struct ib_ucontext *ibcontext); int (*alloc_pd)(struct ib_pd *pd, struct ib_udata *udata); int (*dealloc_pd)(struct ib_pd *pd, struct ib_udata *udata); int (*create_ah)(struct ib_ah *ah, struct rdma_ah_init_attr *attr, struct ib_udata *udata); int (*create_user_ah)(struct ib_ah *ah, struct rdma_ah_init_attr *attr, struct ib_udata *udata); int (*modify_ah)(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); int (*query_ah)(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); int (*destroy_ah)(struct ib_ah *ah, u32 flags); int (*create_srq)(struct ib_srq *srq, struct ib_srq_init_attr *srq_init_attr, struct ib_udata *udata); int (*modify_srq)(struct ib_srq *srq, struct ib_srq_attr *srq_attr, enum ib_srq_attr_mask srq_attr_mask, struct ib_udata *udata); int (*query_srq)(struct ib_srq *srq, struct ib_srq_attr *srq_attr); int (*destroy_srq)(struct ib_srq *srq, struct ib_udata *udata); int (*create_qp)(struct ib_qp *qp, struct ib_qp_init_attr *qp_init_attr, struct ib_udata *udata); int (*modify_qp)(struct ib_qp *qp, struct ib_qp_attr *qp_attr, int qp_attr_mask, struct ib_udata *udata); int (*query_qp)(struct ib_qp *qp, struct ib_qp_attr *qp_attr, int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr); int (*destroy_qp)(struct ib_qp *qp, struct ib_udata *udata); int (*create_cq)(struct ib_cq *cq, const struct ib_cq_init_attr *attr, struct ib_udata *udata); int (*modify_cq)(struct ib_cq *cq, u16 cq_count, u16 cq_period); int (*destroy_cq)(struct ib_cq *cq, struct ib_udata *udata); int (*resize_cq)(struct ib_cq *cq, int cqe, struct ib_udata *udata); struct ib_mr *(*get_dma_mr)(struct ib_pd *pd, int mr_access_flags); struct ib_mr *(*reg_user_mr)(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int mr_access_flags, struct ib_udata *udata); struct ib_mr *(*reg_user_mr_dmabuf)(struct ib_pd *pd, u64 offset, u64 length, u64 virt_addr, int fd, int mr_access_flags, struct ib_udata *udata); struct ib_mr *(*rereg_user_mr)(struct ib_mr *mr, int flags, u64 start, u64 length, u64 virt_addr, int mr_access_flags, struct ib_pd *pd, struct ib_udata *udata); int (*dereg_mr)(struct ib_mr *mr, struct ib_udata *udata); struct ib_mr *(*alloc_mr)(struct ib_pd *pd, enum ib_mr_type mr_type, u32 max_num_sg); struct ib_mr *(*alloc_mr_integrity)(struct ib_pd *pd, u32 max_num_data_sg, u32 max_num_meta_sg); int (*advise_mr)(struct ib_pd *pd, enum ib_uverbs_advise_mr_advice advice, u32 flags, struct ib_sge *sg_list, u32 num_sge, struct uverbs_attr_bundle *attrs); /* * Kernel users should universally support relaxed ordering (RO), as * they are designed to read data only after observing the CQE and use * the DMA API correctly. * * Some drivers implicitly enable RO if platform supports it. */ int (*map_mr_sg)(struct ib_mr *mr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset); int (*check_mr_status)(struct ib_mr *mr, u32 check_mask, struct ib_mr_status *mr_status); int (*alloc_mw)(struct ib_mw *mw, struct ib_udata *udata); int (*dealloc_mw)(struct ib_mw *mw); int (*attach_mcast)(struct ib_qp *qp, union ib_gid *gid, u16 lid); int (*detach_mcast)(struct ib_qp *qp, union ib_gid *gid, u16 lid); int (*alloc_xrcd)(struct ib_xrcd *xrcd, struct ib_udata *udata); int (*dealloc_xrcd)(struct ib_xrcd *xrcd, struct ib_udata *udata); struct ib_flow *(*create_flow)(struct ib_qp *qp, struct ib_flow_attr *flow_attr, struct ib_udata *udata); int (*destroy_flow)(struct ib_flow *flow_id); struct ib_flow_action *(*create_flow_action_esp)( struct ib_device *device, const struct ib_flow_action_attrs_esp *attr, struct uverbs_attr_bundle *attrs); int (*destroy_flow_action)(struct ib_flow_action *action); int (*modify_flow_action_esp)( struct ib_flow_action *action, const struct ib_flow_action_attrs_esp *attr, struct uverbs_attr_bundle *attrs); int (*set_vf_link_state)(struct ib_device *device, int vf, u32 port, int state); int (*get_vf_config)(struct ib_device *device, int vf, u32 port, struct ifla_vf_info *ivf); int (*get_vf_stats)(struct ib_device *device, int vf, u32 port, struct ifla_vf_stats *stats); int (*get_vf_guid)(struct ib_device *device, int vf, u32 port, struct ifla_vf_guid *node_guid, struct ifla_vf_guid *port_guid); int (*set_vf_guid)(struct ib_device *device, int vf, u32 port, u64 guid, int type); struct ib_wq *(*create_wq)(struct ib_pd *pd, struct ib_wq_init_attr *init_attr, struct ib_udata *udata); int (*destroy_wq)(struct ib_wq *wq, struct ib_udata *udata); int (*modify_wq)(struct ib_wq *wq, struct ib_wq_attr *attr, u32 wq_attr_mask, struct ib_udata *udata); int (*create_rwq_ind_table)(struct ib_rwq_ind_table *ib_rwq_ind_table, struct ib_rwq_ind_table_init_attr *init_attr, struct ib_udata *udata); int (*destroy_rwq_ind_table)(struct ib_rwq_ind_table *wq_ind_table); struct ib_dm *(*alloc_dm)(struct ib_device *device, struct ib_ucontext *context, struct ib_dm_alloc_attr *attr, struct uverbs_attr_bundle *attrs); int (*dealloc_dm)(struct ib_dm *dm, struct uverbs_attr_bundle *attrs); struct ib_mr *(*reg_dm_mr)(struct ib_pd *pd, struct ib_dm *dm, struct ib_dm_mr_attr *attr, struct uverbs_attr_bundle *attrs); int (*create_counters)(struct ib_counters *counters, struct uverbs_attr_bundle *attrs); int (*destroy_counters)(struct ib_counters *counters); int (*read_counters)(struct ib_counters *counters, struct ib_counters_read_attr *counters_read_attr, struct uverbs_attr_bundle *attrs); int (*map_mr_sg_pi)(struct ib_mr *mr, struct scatterlist *data_sg, int data_sg_nents, unsigned int *data_sg_offset, struct scatterlist *meta_sg, int meta_sg_nents, unsigned int *meta_sg_offset); /** * alloc_hw_[device,port]_stats - Allocate a struct rdma_hw_stats and * fill in the driver initialized data. The struct is kfree()'ed by * the sysfs core when the device is removed. A lifespan of -1 in the * return struct tells the core to set a default lifespan. */ struct rdma_hw_stats *(*alloc_hw_device_stats)(struct ib_device *device); struct rdma_hw_stats *(*alloc_hw_port_stats)(struct ib_device *device, u32 port_num); /** * get_hw_stats - Fill in the counter value(s) in the stats struct. * @index - The index in the value array we wish to have updated, or * num_counters if we want all stats updated * Return codes - * < 0 - Error, no counters updated * index - Updated the single counter pointed to by index * num_counters - Updated all counters (will reset the timestamp * and prevent further calls for lifespan milliseconds) * Drivers are allowed to update all counters in leiu of just the * one given in index at their option */ int (*get_hw_stats)(struct ib_device *device, struct rdma_hw_stats *stats, u32 port, int index); /** * Allows rdma drivers to add their own restrack attributes. */ int (*fill_res_mr_entry)(struct sk_buff *msg, struct ib_mr *ibmr); int (*fill_res_mr_entry_raw)(struct sk_buff *msg, struct ib_mr *ibmr); int (*fill_res_cq_entry)(struct sk_buff *msg, struct ib_cq *ibcq); int (*fill_res_cq_entry_raw)(struct sk_buff *msg, struct ib_cq *ibcq); int (*fill_res_qp_entry)(struct sk_buff *msg, struct ib_qp *ibqp); int (*fill_res_qp_entry_raw)(struct sk_buff *msg, struct ib_qp *ibqp); int (*fill_res_cm_id_entry)(struct sk_buff *msg, struct rdma_cm_id *id); /* Device lifecycle callbacks */ /* * Called after the device becomes registered, before clients are * attached */ int (*enable_driver)(struct ib_device *dev); /* * This is called as part of ib_dealloc_device(). */ void (*dealloc_driver)(struct ib_device *dev); /* iWarp CM callbacks */ void (*iw_add_ref)(struct ib_qp *qp); void (*iw_rem_ref)(struct ib_qp *qp); struct ib_qp *(*iw_get_qp)(struct ib_device *device, int qpn); int (*iw_connect)(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param); int (*iw_accept)(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param); int (*iw_reject)(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len); int (*iw_create_listen)(struct iw_cm_id *cm_id, int backlog); int (*iw_destroy_listen)(struct iw_cm_id *cm_id); /** * counter_bind_qp - Bind a QP to a counter. * @counter - The counter to be bound. If counter->id is zero then * the driver needs to allocate a new counter and set counter->id */ int (*counter_bind_qp)(struct rdma_counter *counter, struct ib_qp *qp); /** * counter_unbind_qp - Unbind the qp from the dynamically-allocated * counter and bind it onto the default one */ int (*counter_unbind_qp)(struct ib_qp *qp); /** * counter_dealloc -De-allocate the hw counter */ int (*counter_dealloc)(struct rdma_counter *counter); /** * counter_alloc_stats - Allocate a struct rdma_hw_stats and fill in * the driver initialized data. */ struct rdma_hw_stats *(*counter_alloc_stats)( struct rdma_counter *counter); /** * counter_update_stats - Query the stats value of this counter */ int (*counter_update_stats)(struct rdma_counter *counter); /** * Allows rdma drivers to add their own restrack attributes * dumped via 'rdma stat' iproute2 command. */ int (*fill_stat_mr_entry)(struct sk_buff *msg, struct ib_mr *ibmr); /* query driver for its ucontext properties */ int (*query_ucontext)(struct ib_ucontext *context, struct uverbs_attr_bundle *attrs); /* * Provide NUMA node. This API exists for rdmavt/hfi1 only. * Everyone else relies on Linux memory management model. */ int (*get_numa_node)(struct ib_device *dev); DECLARE_RDMA_OBJ_SIZE(ib_ah); DECLARE_RDMA_OBJ_SIZE(ib_counters); DECLARE_RDMA_OBJ_SIZE(ib_cq); DECLARE_RDMA_OBJ_SIZE(ib_mw); DECLARE_RDMA_OBJ_SIZE(ib_pd); DECLARE_RDMA_OBJ_SIZE(ib_qp); DECLARE_RDMA_OBJ_SIZE(ib_rwq_ind_table); DECLARE_RDMA_OBJ_SIZE(ib_srq); DECLARE_RDMA_OBJ_SIZE(ib_ucontext); DECLARE_RDMA_OBJ_SIZE(ib_xrcd); }; struct ib_core_device { /* device must be the first element in structure until, * union of ib_core_device and device exists in ib_device. */ struct device dev; possible_net_t rdma_net; struct kobject *ports_kobj; struct list_head port_list; struct ib_device *owner; /* reach back to owner ib_device */ }; struct rdma_restrack_root; struct ib_device { /* Do not access @dma_device directly from ULP nor from HW drivers. */ struct device *dma_device; struct ib_device_ops ops; char name[IB_DEVICE_NAME_MAX]; struct rcu_head rcu_head; struct list_head event_handler_list; /* Protects event_handler_list */ struct rw_semaphore event_handler_rwsem; /* Protects QP's event_handler calls and open_qp list */ spinlock_t qp_open_list_lock; struct rw_semaphore client_data_rwsem; struct xarray client_data; struct mutex unregistration_lock; /* Synchronize GID, Pkey cache entries, subnet prefix, LMC */ rwlock_t cache_lock; /** * port_data is indexed by port number */ struct ib_port_data *port_data; int num_comp_vectors; union { struct device dev; struct ib_core_device coredev; }; /* First group is for device attributes, * Second group is for driver provided attributes (optional). * Third group is for the hw_stats * It is a NULL terminated array. */ const struct attribute_group *groups[4]; u8 hw_stats_attr_index; u64 uverbs_cmd_mask; char node_desc[IB_DEVICE_NODE_DESC_MAX]; __be64 node_guid; u32 local_dma_lkey; u16 is_switch:1; /* Indicates kernel verbs support, should not be used in drivers */ u16 kverbs_provider:1; /* CQ adaptive moderation (RDMA DIM) */ u16 use_cq_dim:1; u8 node_type; u32 phys_port_cnt; struct ib_device_attr attrs; struct hw_stats_device_data *hw_stats_data; #ifdef CONFIG_CGROUP_RDMA struct rdmacg_device cg_device; #endif u32 index; spinlock_t cq_pools_lock; struct list_head cq_pools[IB_POLL_LAST_POOL_TYPE + 1]; struct rdma_restrack_root *res; const struct uapi_definition *driver_def; /* * Positive refcount indicates that the device is currently * registered and cannot be unregistered. */ refcount_t refcount; struct completion unreg_completion; struct work_struct unregistration_work; const struct rdma_link_ops *link_ops; /* Protects compat_devs xarray modifications */ struct mutex compat_devs_mutex; /* Maintains compat devices for each net namespace */ struct xarray compat_devs; /* Used by iWarp CM */ char iw_ifname[IFNAMSIZ]; u32 iw_driver_flags; u32 lag_flags; }; static inline void *rdma_zalloc_obj(struct ib_device *dev, size_t size, gfp_t gfp, bool is_numa_aware) { if (is_numa_aware && dev->ops.get_numa_node) return kzalloc_node(size, gfp, dev->ops.get_numa_node(dev)); return kzalloc(size, gfp); } struct ib_client_nl_info; struct ib_client { const char *name; int (*add)(struct ib_device *ibdev); void (*remove)(struct ib_device *, void *client_data); void (*rename)(struct ib_device *dev, void *client_data); int (*get_nl_info)(struct ib_device *ibdev, void *client_data, struct ib_client_nl_info *res); int (*get_global_nl_info)(struct ib_client_nl_info *res); /* Returns the net_dev belonging to this ib_client and matching the * given parameters. * @dev: An RDMA device that the net_dev use for communication. * @port: A physical port number on the RDMA device. * @pkey: P_Key that the net_dev uses if applicable. * @gid: A GID that the net_dev uses to communicate. * @addr: An IP address the net_dev is configured with. * @client_data: The device's client data set by ib_set_client_data(). * * An ib_client that implements a net_dev on top of RDMA devices * (such as IP over IB) should implement this callback, allowing the * rdma_cm module to find the right net_dev for a given request. * * The caller is responsible for calling dev_put on the returned * netdev. */ struct net_device *(*get_net_dev_by_params)( struct ib_device *dev, u32 port, u16 pkey, const union ib_gid *gid, const struct sockaddr *addr, void *client_data); refcount_t uses; struct completion uses_zero; u32 client_id; /* kverbs are not required by the client */ u8 no_kverbs_req:1; }; /* * IB block DMA iterator * * Iterates the DMA-mapped SGL in contiguous memory blocks aligned * to a HW supported page size. */ struct ib_block_iter { /* internal states */ struct scatterlist *__sg; /* sg holding the current aligned block */ dma_addr_t __dma_addr; /* unaligned DMA address of this block */ size_t __sg_numblocks; /* ib_umem_num_dma_blocks() */ unsigned int __sg_nents; /* number of SG entries */ unsigned int __sg_advance; /* number of bytes to advance in sg in next step */ unsigned int __pg_bit; /* alignment of current block */ }; struct ib_device *_ib_alloc_device(size_t size); #define ib_alloc_device(drv_struct, member) \ container_of(_ib_alloc_device(sizeof(struct drv_struct) + \ BUILD_BUG_ON_ZERO(offsetof( \ struct drv_struct, member))), \ struct drv_struct, member) void ib_dealloc_device(struct ib_device *device); void ib_get_device_fw_str(struct ib_device *device, char *str); int ib_register_device(struct ib_device *device, const char *name, struct device *dma_device); void ib_unregister_device(struct ib_device *device); void ib_unregister_driver(enum rdma_driver_id driver_id); void ib_unregister_device_and_put(struct ib_device *device); void ib_unregister_device_queued(struct ib_device *ib_dev); int ib_register_client (struct ib_client *client); void ib_unregister_client(struct ib_client *client); void __rdma_block_iter_start(struct ib_block_iter *biter, struct scatterlist *sglist, unsigned int nents, unsigned long pgsz); bool __rdma_block_iter_next(struct ib_block_iter *biter); /** * rdma_block_iter_dma_address - get the aligned dma address of the current * block held by the block iterator. * @biter: block iterator holding the memory block */ static inline dma_addr_t rdma_block_iter_dma_address(struct ib_block_iter *biter) { return biter->__dma_addr & ~(BIT_ULL(biter->__pg_bit) - 1); } /** * rdma_for_each_block - iterate over contiguous memory blocks of the sg list * @sglist: sglist to iterate over * @biter: block iterator holding the memory block * @nents: maximum number of sg entries to iterate over * @pgsz: best HW supported page size to use * * Callers may use rdma_block_iter_dma_address() to get each * blocks aligned DMA address. */ #define rdma_for_each_block(sglist, biter, nents, pgsz) \ for (__rdma_block_iter_start(biter, sglist, nents, \ pgsz); \ __rdma_block_iter_next(biter);) /** * ib_get_client_data - Get IB client context * @device:Device to get context for * @client:Client to get context for * * ib_get_client_data() returns the client context data set with * ib_set_client_data(). This can only be called while the client is * registered to the device, once the ib_client remove() callback returns this * cannot be called. */ static inline void *ib_get_client_data(struct ib_device *device, struct ib_client *client) { return xa_load(&device->client_data, client->client_id); } void ib_set_client_data(struct ib_device *device, struct ib_client *client, void *data); void ib_set_device_ops(struct ib_device *device, const struct ib_device_ops *ops); int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma, unsigned long pfn, unsigned long size, pgprot_t prot, struct rdma_user_mmap_entry *entry); int rdma_user_mmap_entry_insert(struct ib_ucontext *ucontext, struct rdma_user_mmap_entry *entry, size_t length); int rdma_user_mmap_entry_insert_range(struct ib_ucontext *ucontext, struct rdma_user_mmap_entry *entry, size_t length, u32 min_pgoff, u32 max_pgoff); struct rdma_user_mmap_entry * rdma_user_mmap_entry_get_pgoff(struct ib_ucontext *ucontext, unsigned long pgoff); struct rdma_user_mmap_entry * rdma_user_mmap_entry_get(struct ib_ucontext *ucontext, struct vm_area_struct *vma); void rdma_user_mmap_entry_put(struct rdma_user_mmap_entry *entry); void rdma_user_mmap_entry_remove(struct rdma_user_mmap_entry *entry); static inline int ib_copy_from_udata(void *dest, struct ib_udata *udata, size_t len) { return copy_from_user(dest, udata->inbuf, len) ? -EFAULT : 0; } static inline int ib_copy_to_udata(struct ib_udata *udata, void *src, size_t len) { return copy_to_user(udata->outbuf, src, len) ? -EFAULT : 0; } static inline bool ib_is_buffer_cleared(const void __user *p, size_t len) { bool ret; u8 *buf; if (len > USHRT_MAX) return false; buf = memdup_user(p, len); if (IS_ERR(buf)) return false; ret = !memchr_inv(buf, 0, len); kfree(buf); return ret; } static inline bool ib_is_udata_cleared(struct ib_udata *udata, size_t offset, size_t len) { return ib_is_buffer_cleared(udata->inbuf + offset, len); } /** * ib_modify_qp_is_ok - Check that the supplied attribute mask * contains all required attributes and no attributes not allowed for * the given QP state transition. * @cur_state: Current QP state * @next_state: Next QP state * @type: QP type * @mask: Mask of supplied QP attributes * * This function is a helper function that a low-level driver's * modify_qp method can use to validate the consumer's input. It * checks that cur_state and next_state are valid QP states, that a * transition from cur_state to next_state is allowed by the IB spec, * and that the attribute mask supplied is allowed for the transition. */ bool ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state, enum ib_qp_type type, enum ib_qp_attr_mask mask); void ib_register_event_handler(struct ib_event_handler *event_handler); void ib_unregister_event_handler(struct ib_event_handler *event_handler); void ib_dispatch_event(const struct ib_event *event); int ib_query_port(struct ib_device *device, u32 port_num, struct ib_port_attr *port_attr); enum rdma_link_layer rdma_port_get_link_layer(struct ib_device *device, u32 port_num); /** * rdma_cap_ib_switch - Check if the device is IB switch * @device: Device to check * * Device driver is responsible for setting is_switch bit on * in ib_device structure at init time. * * Return: true if the device is IB switch. */ static inline bool rdma_cap_ib_switch(const struct ib_device *device) { return device->is_switch; } /** * rdma_start_port - Return the first valid port number for the device * specified * * @device: Device to be checked * * Return start port number */ static inline u32 rdma_start_port(const struct ib_device *device) { return rdma_cap_ib_switch(device) ? 0 : 1; } /** * rdma_for_each_port - Iterate over all valid port numbers of the IB device * @device - The struct ib_device * to iterate over * @iter - The unsigned int to store the port number */ #define rdma_for_each_port(device, iter) \ for (iter = rdma_start_port(device + \ BUILD_BUG_ON_ZERO(!__same_type(u32, \ iter))); \ iter <= rdma_end_port(device); iter++) /** * rdma_end_port - Return the last valid port number for the device * specified * * @device: Device to be checked * * Return last port number */ static inline u32 rdma_end_port(const struct ib_device *device) { return rdma_cap_ib_switch(device) ? 0 : device->phys_port_cnt; } static inline int rdma_is_port_valid(const struct ib_device *device, unsigned int port) { return (port >= rdma_start_port(device) && port <= rdma_end_port(device)); } static inline bool rdma_is_grh_required(const struct ib_device *device, u32 port_num) { return device->port_data[port_num].immutable.core_cap_flags & RDMA_CORE_PORT_IB_GRH_REQUIRED; } static inline bool rdma_protocol_ib(const struct ib_device *device, u32 port_num) { return device->port_data[port_num].immutable.core_cap_flags & RDMA_CORE_CAP_PROT_IB; } static inline bool rdma_protocol_roce(const struct ib_device *device, u32 port_num) { return device->port_data[port_num].immutable.core_cap_flags & (RDMA_CORE_CAP_PROT_ROCE | RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP); } static inline bool rdma_protocol_roce_udp_encap(const struct ib_device *device, u32 port_num) { return device->port_data[port_num].immutable.core_cap_flags & RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP; } static inline bool rdma_protocol_roce_eth_encap(const struct ib_device *device, u32 port_num) { return device->port_data[port_num].immutable.core_cap_flags & RDMA_CORE_CAP_PROT_ROCE; } static inline bool rdma_protocol_iwarp(const struct ib_device *device, u32 port_num) { return device->port_data[port_num].immutable.core_cap_flags & RDMA_CORE_CAP_PROT_IWARP; } static inline bool rdma_ib_or_roce(const struct ib_device *device, u32 port_num) { return rdma_protocol_ib(device, port_num) || rdma_protocol_roce(device, port_num); } static inline bool rdma_protocol_raw_packet(const struct ib_device *device, u32 port_num) { return device->port_data[port_num].immutable.core_cap_flags & RDMA_CORE_CAP_PROT_RAW_PACKET; } static inline bool rdma_protocol_usnic(const struct ib_device *device, u32 port_num) { return device->port_data[port_num].immutable.core_cap_flags & RDMA_CORE_CAP_PROT_USNIC; } /** * rdma_cap_ib_mad - Check if the port of a device supports Infiniband * Management Datagrams. * @device: Device to check * @port_num: Port number to check * * Management Datagrams (MAD) are a required part of the InfiniBand * specification and are supported on all InfiniBand devices. A slightly * extended version are also supported on OPA interfaces. * * Return: true if the port supports sending/receiving of MAD packets. */ static inline bool rdma_cap_ib_mad(const struct ib_device *device, u32 port_num) { return device->port_data[port_num].immutable.core_cap_flags & RDMA_CORE_CAP_IB_MAD; } /** * rdma_cap_opa_mad - Check if the port of device provides support for OPA * Management Datagrams. * @device: Device to check * @port_num: Port number to check * * Intel OmniPath devices extend and/or replace the InfiniBand Management * datagrams with their own versions. These OPA MADs share many but not all of * the characteristics of InfiniBand MADs. * * OPA MADs differ in the following ways: * * 1) MADs are variable size up to 2K * IBTA defined MADs remain fixed at 256 bytes * 2) OPA SMPs must carry valid PKeys * 3) OPA SMP packets are a different format * * Return: true if the port supports OPA MAD packet formats. */ static inline bool rdma_cap_opa_mad(struct ib_device *device, u32 port_num) { return device->port_data[port_num].immutable.core_cap_flags & RDMA_CORE_CAP_OPA_MAD; } /** * rdma_cap_ib_smi - Check if the port of a device provides an Infiniband * Subnet Management Agent (SMA) on the Subnet Management Interface (SMI). * @device: Device to check * @port_num: Port number to check * * Each InfiniBand node is required to provide a Subnet Management Agent * that the subnet manager can access. Prior to the fabric being fully * configured by the subnet manager, the SMA is accessed via a well known * interface called the Subnet Management Interface (SMI). This interface * uses directed route packets to communicate with the SM to get around the * chicken and egg problem of the SM needing to know what's on the fabric * in order to configure the fabric, and needing to configure the fabric in * order to send packets to the devices on the fabric. These directed * route packets do not need the fabric fully configured in order to reach * their destination. The SMI is the only method allowed to send * directed route packets on an InfiniBand fabric. * * Return: true if the port provides an SMI. */ static inline bool rdma_cap_ib_smi(const struct ib_device *device, u32 port_num) { return device->port_data[port_num].immutable.core_cap_flags & RDMA_CORE_CAP_IB_SMI; } /** * rdma_cap_ib_cm - Check if the port of device has the capability Infiniband * Communication Manager. * @device: Device to check * @port_num: Port number to check * * The InfiniBand Communication Manager is one of many pre-defined General * Service Agents (GSA) that are accessed via the General Service * Interface (GSI). It's role is to facilitate establishment of connections * between nodes as well as other management related tasks for established * connections. * * Return: true if the port supports an IB CM (this does not guarantee that * a CM is actually running however). */ static inline bool rdma_cap_ib_cm(const struct ib_device *device, u32 port_num) { return device->port_data[port_num].immutable.core_cap_flags & RDMA_CORE_CAP_IB_CM; } /** * rdma_cap_iw_cm - Check if the port of device has the capability IWARP * Communication Manager. * @device: Device to check * @port_num: Port number to check * * Similar to above, but specific to iWARP connections which have a different * managment protocol than InfiniBand. * * Return: true if the port supports an iWARP CM (this does not guarantee that * a CM is actually running however). */ static inline bool rdma_cap_iw_cm(const struct ib_device *device, u32 port_num) { return device->port_data[port_num].immutable.core_cap_flags & RDMA_CORE_CAP_IW_CM; } /** * rdma_cap_ib_sa - Check if the port of device has the capability Infiniband * Subnet Administration. * @device: Device to check * @port_num: Port number to check * * An InfiniBand Subnet Administration (SA) service is a pre-defined General * Service Agent (GSA) provided by the Subnet Manager (SM). On InfiniBand * fabrics, devices should resolve routes to other hosts by contacting the * SA to query the proper route. * * Return: true if the port should act as a client to the fabric Subnet * Administration interface. This does not imply that the SA service is * running locally. */ static inline bool rdma_cap_ib_sa(const struct ib_device *device, u32 port_num) { return device->port_data[port_num].immutable.core_cap_flags & RDMA_CORE_CAP_IB_SA; } /** * rdma_cap_ib_mcast - Check if the port of device has the capability Infiniband * Multicast. * @device: Device to check * @port_num: Port number to check * * InfiniBand multicast registration is more complex than normal IPv4 or * IPv6 multicast registration. Each Host Channel Adapter must register * with the Subnet Manager when it wishes to join a multicast group. It * should do so only once regardless of how many queue pairs it subscribes * to this group. And it should leave the group only after all queue pairs * attached to the group have been detached. * * Return: true if the port must undertake the additional adminstrative * overhead of registering/unregistering with the SM and tracking of the * total number of queue pairs attached to the multicast group. */ static inline bool rdma_cap_ib_mcast(const struct ib_device *device, u32 port_num) { return rdma_cap_ib_sa(device, port_num); } /** * rdma_cap_af_ib - Check if the port of device has the capability * Native Infiniband Address. * @device: Device to check * @port_num: Port number to check * * InfiniBand addressing uses a port's GUID + Subnet Prefix to make a default * GID. RoCE uses a different mechanism, but still generates a GID via * a prescribed mechanism and port specific data. * * Return: true if the port uses a GID address to identify devices on the * network. */ static inline bool rdma_cap_af_ib(const struct ib_device *device, u32 port_num) { return device->port_data[port_num].immutable.core_cap_flags & RDMA_CORE_CAP_AF_IB; } /** * rdma_cap_eth_ah - Check if the port of device has the capability * Ethernet Address Handle. * @device: Device to check * @port_num: Port number to check * * RoCE is InfiniBand over Ethernet, and it uses a well defined technique * to fabricate GIDs over Ethernet/IP specific addresses native to the * port. Normally, packet headers are generated by the sending host * adapter, but when sending connectionless datagrams, we must manually * inject the proper headers for the fabric we are communicating over. * * Return: true if we are running as a RoCE port and must force the * addition of a Global Route Header built from our Ethernet Address * Handle into our header list for connectionless packets. */ static inline bool rdma_cap_eth_ah(const struct ib_device *device, u32 port_num) { return device->port_data[port_num].immutable.core_cap_flags & RDMA_CORE_CAP_ETH_AH; } /** * rdma_cap_opa_ah - Check if the port of device supports * OPA Address handles * @device: Device to check * @port_num: Port number to check * * Return: true if we are running on an OPA device which supports * the extended OPA addressing. */ static inline bool rdma_cap_opa_ah(struct ib_device *device, u32 port_num) { return (device->port_data[port_num].immutable.core_cap_flags & RDMA_CORE_CAP_OPA_AH) == RDMA_CORE_CAP_OPA_AH; } /** * rdma_max_mad_size - Return the max MAD size required by this RDMA Port. * * @device: Device * @port_num: Port number * * This MAD size includes the MAD headers and MAD payload. No other headers * are included. * * Return the max MAD size required by the Port. Will return 0 if the port * does not support MADs */ static inline size_t rdma_max_mad_size(const struct ib_device *device, u32 port_num) { return device->port_data[port_num].immutable.max_mad_size; } /** * rdma_cap_roce_gid_table - Check if the port of device uses roce_gid_table * @device: Device to check * @port_num: Port number to check * * RoCE GID table mechanism manages the various GIDs for a device. * * NOTE: if allocating the port's GID table has failed, this call will still * return true, but any RoCE GID table API will fail. * * Return: true if the port uses RoCE GID table mechanism in order to manage * its GIDs. */ static inline bool rdma_cap_roce_gid_table(const struct ib_device *device, u32 port_num) { return rdma_protocol_roce(device, port_num) && device->ops.add_gid && device->ops.del_gid; } /* * Check if the device supports READ W/ INVALIDATE. */ static inline bool rdma_cap_read_inv(struct ib_device *dev, u32 port_num) { /* * iWarp drivers must support READ W/ INVALIDATE. No other protocol * has support for it yet. */ return rdma_protocol_iwarp(dev, port_num); } /** * rdma_core_cap_opa_port - Return whether the RDMA Port is OPA or not. * @device: Device * @port_num: 1 based Port number * * Return true if port is an Intel OPA port , false if not */ static inline bool rdma_core_cap_opa_port(struct ib_device *device, u32 port_num) { return (device->port_data[port_num].immutable.core_cap_flags & RDMA_CORE_PORT_INTEL_OPA) == RDMA_CORE_PORT_INTEL_OPA; } /** * rdma_mtu_enum_to_int - Return the mtu of the port as an integer value. * @device: Device * @port_num: Port number * @mtu: enum value of MTU * * Return the MTU size supported by the port as an integer value. Will return * -1 if enum value of mtu is not supported. */ static inline int rdma_mtu_enum_to_int(struct ib_device *device, u32 port, int mtu) { if (rdma_core_cap_opa_port(device, port)) return opa_mtu_enum_to_int((enum opa_mtu)mtu); else return ib_mtu_enum_to_int((enum ib_mtu)mtu); } /** * rdma_mtu_from_attr - Return the mtu of the port from the port attribute. * @device: Device * @port_num: Port number * @attr: port attribute * * Return the MTU size supported by the port as an integer value. */ static inline int rdma_mtu_from_attr(struct ib_device *device, u32 port, struct ib_port_attr *attr) { if (rdma_core_cap_opa_port(device, port)) return attr->phys_mtu; else return ib_mtu_enum_to_int(attr->max_mtu); } int ib_set_vf_link_state(struct ib_device *device, int vf, u32 port, int state); int ib_get_vf_config(struct ib_device *device, int vf, u32 port, struct ifla_vf_info *info); int ib_get_vf_stats(struct ib_device *device, int vf, u32 port, struct ifla_vf_stats *stats); int ib_get_vf_guid(struct ib_device *device, int vf, u32 port, struct ifla_vf_guid *node_guid, struct ifla_vf_guid *port_guid); int ib_set_vf_guid(struct ib_device *device, int vf, u32 port, u64 guid, int type); int ib_query_pkey(struct ib_device *device, u32 port_num, u16 index, u16 *pkey); int ib_modify_device(struct ib_device *device, int device_modify_mask, struct ib_device_modify *device_modify); int ib_modify_port(struct ib_device *device, u32 port_num, int port_modify_mask, struct ib_port_modify *port_modify); int ib_find_gid(struct ib_device *device, union ib_gid *gid, u32 *port_num, u16 *index); int ib_find_pkey(struct ib_device *device, u32 port_num, u16 pkey, u16 *index); enum ib_pd_flags { /* * Create a memory registration for all memory in the system and place * the rkey for it into pd->unsafe_global_rkey. This can be used by * ULPs to avoid the overhead of dynamic MRs. * * This flag is generally considered unsafe and must only be used in * extremly trusted environments. Every use of it will log a warning * in the kernel log. */ IB_PD_UNSAFE_GLOBAL_RKEY = 0x01, }; struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags, const char *caller); /** * ib_alloc_pd - Allocates an unused protection domain. * @device: The device on which to allocate the protection domain. * @flags: protection domain flags * * A protection domain object provides an association between QPs, shared * receive queues, address handles, memory regions, and memory windows. * * Every PD has a local_dma_lkey which can be used as the lkey value for local * memory operations. */ #define ib_alloc_pd(device, flags) \ __ib_alloc_pd((device), (flags), KBUILD_MODNAME) int ib_dealloc_pd_user(struct ib_pd *pd, struct ib_udata *udata); /** * ib_dealloc_pd - Deallocate kernel PD * @pd: The protection domain * * NOTE: for user PD use ib_dealloc_pd_user with valid udata! */ static inline void ib_dealloc_pd(struct ib_pd *pd) { int ret = ib_dealloc_pd_user(pd, NULL); WARN_ONCE(ret, "Destroy of kernel PD shouldn't fail"); } enum rdma_create_ah_flags { /* In a sleepable context */ RDMA_CREATE_AH_SLEEPABLE = BIT(0), }; /** * rdma_create_ah - Creates an address handle for the given address vector. * @pd: The protection domain associated with the address handle. * @ah_attr: The attributes of the address vector. * @flags: Create address handle flags (see enum rdma_create_ah_flags). * * The address handle is used to reference a local or global destination * in all UD QP post sends. */ struct ib_ah *rdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, u32 flags); /** * rdma_create_user_ah - Creates an address handle for the given address vector. * It resolves destination mac address for ah attribute of RoCE type. * @pd: The protection domain associated with the address handle. * @ah_attr: The attributes of the address vector. * @udata: pointer to user's input output buffer information need by * provider driver. * * It returns 0 on success and returns appropriate error code on error. * The address handle is used to reference a local or global destination * in all UD QP post sends. */ struct ib_ah *rdma_create_user_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, struct ib_udata *udata); /** * ib_get_gids_from_rdma_hdr - Get sgid and dgid from GRH or IPv4 header * work completion. * @hdr: the L3 header to parse * @net_type: type of header to parse * @sgid: place to store source gid * @dgid: place to store destination gid */ int ib_get_gids_from_rdma_hdr(const union rdma_network_hdr *hdr, enum rdma_network_type net_type, union ib_gid *sgid, union ib_gid *dgid); /** * ib_get_rdma_header_version - Get the header version * @hdr: the L3 header to parse */ int ib_get_rdma_header_version(const union rdma_network_hdr *hdr); /** * ib_init_ah_attr_from_wc - Initializes address handle attributes from a * work completion. * @device: Device on which the received message arrived. * @port_num: Port on which the received message arrived. * @wc: Work completion associated with the received message. * @grh: References the received global route header. This parameter is * ignored unless the work completion indicates that the GRH is valid. * @ah_attr: Returned attributes that can be used when creating an address * handle for replying to the message. * When ib_init_ah_attr_from_wc() returns success, * (a) for IB link layer it optionally contains a reference to SGID attribute * when GRH is present for IB link layer. * (b) for RoCE link layer it contains a reference to SGID attribute. * User must invoke rdma_cleanup_ah_attr_gid_attr() to release reference to SGID * attributes which are initialized using ib_init_ah_attr_from_wc(). * */ int ib_init_ah_attr_from_wc(struct ib_device *device, u32 port_num, const struct ib_wc *wc, const struct ib_grh *grh, struct rdma_ah_attr *ah_attr); /** * ib_create_ah_from_wc - Creates an address handle associated with the * sender of the specified work completion. * @pd: The protection domain associated with the address handle. * @wc: Work completion information associated with a received message. * @grh: References the received global route header. This parameter is * ignored unless the work completion indicates that the GRH is valid. * @port_num: The outbound port number to associate with the address. * * The address handle is used to reference a local or global destination * in all UD QP post sends. */ struct ib_ah *ib_create_ah_from_wc(struct ib_pd *pd, const struct ib_wc *wc, const struct ib_grh *grh, u32 port_num); /** * rdma_modify_ah - Modifies the address vector associated with an address * handle. * @ah: The address handle to modify. * @ah_attr: The new address vector attributes to associate with the * address handle. */ int rdma_modify_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); /** * rdma_query_ah - Queries the address vector associated with an address * handle. * @ah: The address handle to query. * @ah_attr: The address vector attributes associated with the address * handle. */ int rdma_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); enum rdma_destroy_ah_flags { /* In a sleepable context */ RDMA_DESTROY_AH_SLEEPABLE = BIT(0), }; /** * rdma_destroy_ah_user - Destroys an address handle. * @ah: The address handle to destroy. * @flags: Destroy address handle flags (see enum rdma_destroy_ah_flags). * @udata: Valid user data or NULL for kernel objects */ int rdma_destroy_ah_user(struct ib_ah *ah, u32 flags, struct ib_udata *udata); /** * rdma_destroy_ah - Destroys an kernel address handle. * @ah: The address handle to destroy. * @flags: Destroy address handle flags (see enum rdma_destroy_ah_flags). * * NOTE: for user ah use rdma_destroy_ah_user with valid udata! */ static inline void rdma_destroy_ah(struct ib_ah *ah, u32 flags) { int ret = rdma_destroy_ah_user(ah, flags, NULL); WARN_ONCE(ret, "Destroy of kernel AH shouldn't fail"); } struct ib_srq *ib_create_srq_user(struct ib_pd *pd, struct ib_srq_init_attr *srq_init_attr, struct ib_usrq_object *uobject, struct ib_udata *udata); static inline struct ib_srq * ib_create_srq(struct ib_pd *pd, struct ib_srq_init_attr *srq_init_attr) { if (!pd->device->ops.create_srq) return ERR_PTR(-EOPNOTSUPP); return ib_create_srq_user(pd, srq_init_attr, NULL, NULL); } /** * ib_modify_srq - Modifies the attributes for the specified SRQ. * @srq: The SRQ to modify. * @srq_attr: On input, specifies the SRQ attributes to modify. On output, * the current values of selected SRQ attributes are returned. * @srq_attr_mask: A bit-mask used to specify which attributes of the SRQ * are being modified. * * The mask may contain IB_SRQ_MAX_WR to resize the SRQ and/or * IB_SRQ_LIMIT to set the SRQ's limit and request notification when * the number of receives queued drops below the limit. */ int ib_modify_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr, enum ib_srq_attr_mask srq_attr_mask); /** * ib_query_srq - Returns the attribute list and current values for the * specified SRQ. * @srq: The SRQ to query. * @srq_attr: The attributes of the specified SRQ. */ int ib_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr); /** * ib_destroy_srq_user - Destroys the specified SRQ. * @srq: The SRQ to destroy. * @udata: Valid user data or NULL for kernel objects */ int ib_destroy_srq_user(struct ib_srq *srq, struct ib_udata *udata); /** * ib_destroy_srq - Destroys the specified kernel SRQ. * @srq: The SRQ to destroy. * * NOTE: for user srq use ib_destroy_srq_user with valid udata! */ static inline void ib_destroy_srq(struct ib_srq *srq) { int ret = ib_destroy_srq_user(srq, NULL); WARN_ONCE(ret, "Destroy of kernel SRQ shouldn't fail"); } /** * ib_post_srq_recv - Posts a list of work requests to the specified SRQ. * @srq: The SRQ to post the work request on. * @recv_wr: A list of work requests to post on the receive queue. * @bad_recv_wr: On an immediate failure, this parameter will reference * the work request that failed to be posted on the QP. */ static inline int ib_post_srq_recv(struct ib_srq *srq, const struct ib_recv_wr *recv_wr, const struct ib_recv_wr **bad_recv_wr) { const struct ib_recv_wr *dummy; return srq->device->ops.post_srq_recv(srq, recv_wr, bad_recv_wr ? : &dummy); } struct ib_qp *ib_create_qp_kernel(struct ib_pd *pd, struct ib_qp_init_attr *qp_init_attr, const char *caller); /** * ib_create_qp - Creates a kernel QP associated with the specific protection * domain. * @pd: The protection domain associated with the QP. * @init_attr: A list of initial attributes required to create the * QP. If QP creation succeeds, then the attributes are updated to * the actual capabilities of the created QP. */ static inline struct ib_qp *ib_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *init_attr) { return ib_create_qp_kernel(pd, init_attr, KBUILD_MODNAME); } /** * ib_modify_qp_with_udata - Modifies the attributes for the specified QP. * @qp: The QP to modify. * @attr: On input, specifies the QP attributes to modify. On output, * the current values of selected QP attributes are returned. * @attr_mask: A bit-mask used to specify which attributes of the QP * are being modified. * @udata: pointer to user's input output buffer information * are being modified. * It returns 0 on success and returns appropriate error code on error. */ int ib_modify_qp_with_udata(struct ib_qp *qp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata); /** * ib_modify_qp - Modifies the attributes for the specified QP and then * transitions the QP to the given state. * @qp: The QP to modify. * @qp_attr: On input, specifies the QP attributes to modify. On output, * the current values of selected QP attributes are returned. * @qp_attr_mask: A bit-mask used to specify which attributes of the QP * are being modified. */ int ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr, int qp_attr_mask); /** * ib_query_qp - Returns the attribute list and current values for the * specified QP. * @qp: The QP to query. * @qp_attr: The attributes of the specified QP. * @qp_attr_mask: A bit-mask used to select specific attributes to query. * @qp_init_attr: Additional attributes of the selected QP. * * The qp_attr_mask may be used to limit the query to gathering only the * selected attributes. */ int ib_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr, int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr); /** * ib_destroy_qp - Destroys the specified QP. * @qp: The QP to destroy. * @udata: Valid udata or NULL for kernel objects */ int ib_destroy_qp_user(struct ib_qp *qp, struct ib_udata *udata); /** * ib_destroy_qp - Destroys the specified kernel QP. * @qp: The QP to destroy. * * NOTE: for user qp use ib_destroy_qp_user with valid udata! */ static inline int ib_destroy_qp(struct ib_qp *qp) { return ib_destroy_qp_user(qp, NULL); } /** * ib_open_qp - Obtain a reference to an existing sharable QP. * @xrcd - XRC domain * @qp_open_attr: Attributes identifying the QP to open. * * Returns a reference to a sharable QP. */ struct ib_qp *ib_open_qp(struct ib_xrcd *xrcd, struct ib_qp_open_attr *qp_open_attr); /** * ib_close_qp - Release an external reference to a QP. * @qp: The QP handle to release * * The opened QP handle is released by the caller. The underlying * shared QP is not destroyed until all internal references are released. */ int ib_close_qp(struct ib_qp *qp); /** * ib_post_send - Posts a list of work requests to the send queue of * the specified QP. * @qp: The QP to post the work request on. * @send_wr: A list of work requests to post on the send queue. * @bad_send_wr: On an immediate failure, this parameter will reference * the work request that failed to be posted on the QP. * * While IBA Vol. 1 section 11.4.1.1 specifies that if an immediate * error is returned, the QP state shall not be affected, * ib_post_send() will return an immediate error after queueing any * earlier work requests in the list. */ static inline int ib_post_send(struct ib_qp *qp, const struct ib_send_wr *send_wr, const struct ib_send_wr **bad_send_wr) { const struct ib_send_wr *dummy; return qp->device->ops.post_send(qp, send_wr, bad_send_wr ? : &dummy); } /** * ib_post_recv - Posts a list of work requests to the receive queue of * the specified QP. * @qp: The QP to post the work request on. * @recv_wr: A list of work requests to post on the receive queue. * @bad_recv_wr: On an immediate failure, this parameter will reference * the work request that failed to be posted on the QP. */ static inline int ib_post_recv(struct ib_qp *qp, const struct ib_recv_wr *recv_wr, const struct ib_recv_wr **bad_recv_wr) { const struct ib_recv_wr *dummy; return qp->device->ops.post_recv(qp, recv_wr, bad_recv_wr ? : &dummy); } struct ib_cq *__ib_alloc_cq(struct ib_device *dev, void *private, int nr_cqe, int comp_vector, enum ib_poll_context poll_ctx, const char *caller); static inline struct ib_cq *ib_alloc_cq(struct ib_device *dev, void *private, int nr_cqe, int comp_vector, enum ib_poll_context poll_ctx) { return __ib_alloc_cq(dev, private, nr_cqe, comp_vector, poll_ctx, KBUILD_MODNAME); } struct ib_cq *__ib_alloc_cq_any(struct ib_device *dev, void *private, int nr_cqe, enum ib_poll_context poll_ctx, const char *caller); /** * ib_alloc_cq_any: Allocate kernel CQ * @dev: The IB device * @private: Private data attached to the CQE * @nr_cqe: Number of CQEs in the CQ * @poll_ctx: Context used for polling the CQ */ static inline struct ib_cq *ib_alloc_cq_any(struct ib_device *dev, void *private, int nr_cqe, enum ib_poll_context poll_ctx) { return __ib_alloc_cq_any(dev, private, nr_cqe, poll_ctx, KBUILD_MODNAME); } void ib_free_cq(struct ib_cq *cq); int ib_process_cq_direct(struct ib_cq *cq, int budget); /** * ib_create_cq - Creates a CQ on the specified device. * @device: The device on which to create the CQ. * @comp_handler: A user-specified callback that is invoked when a * completion event occurs on the CQ. * @event_handler: A user-specified callback that is invoked when an * asynchronous event not associated with a completion occurs on the CQ. * @cq_context: Context associated with the CQ returned to the user via * the associated completion and event handlers. * @cq_attr: The attributes the CQ should be created upon. * * Users can examine the cq structure to determine the actual CQ size. */ struct ib_cq *__ib_create_cq(struct ib_device *device, ib_comp_handler comp_handler, void (*event_handler)(struct ib_event *, void *), void *cq_context, const struct ib_cq_init_attr *cq_attr, const char *caller); #define ib_create_cq(device, cmp_hndlr, evt_hndlr, cq_ctxt, cq_attr) \ __ib_create_cq((device), (cmp_hndlr), (evt_hndlr), (cq_ctxt), (cq_attr), KBUILD_MODNAME) /** * ib_resize_cq - Modifies the capacity of the CQ. * @cq: The CQ to resize. * @cqe: The minimum size of the CQ. * * Users can examine the cq structure to determine the actual CQ size. */ int ib_resize_cq(struct ib_cq *cq, int cqe); /** * rdma_set_cq_moderation - Modifies moderation params of the CQ * @cq: The CQ to modify. * @cq_count: number of CQEs that will trigger an event * @cq_period: max period of time in usec before triggering an event * */ int rdma_set_cq_moderation(struct ib_cq *cq, u16 cq_count, u16 cq_period); /** * ib_destroy_cq_user - Destroys the specified CQ. * @cq: The CQ to destroy. * @udata: Valid user data or NULL for kernel objects */ int ib_destroy_cq_user(struct ib_cq *cq, struct ib_udata *udata); /** * ib_destroy_cq - Destroys the specified kernel CQ. * @cq: The CQ to destroy. * * NOTE: for user cq use ib_destroy_cq_user with valid udata! */ static inline void ib_destroy_cq(struct ib_cq *cq) { int ret = ib_destroy_cq_user(cq, NULL); WARN_ONCE(ret, "Destroy of kernel CQ shouldn't fail"); } /** * ib_poll_cq - poll a CQ for completion(s) * @cq:the CQ being polled * @num_entries:maximum number of completions to return * @wc:array of at least @num_entries &struct ib_wc where completions * will be returned * * Poll a CQ for (possibly multiple) completions. If the return value * is < 0, an error occurred. If the return value is >= 0, it is the * number of completions returned. If the return value is * non-negative and < num_entries, then the CQ was emptied. */ static inline int ib_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc) { return cq->device->ops.poll_cq(cq, num_entries, wc); } /** * ib_req_notify_cq - Request completion notification on a CQ. * @cq: The CQ to generate an event for. * @flags: * Must contain exactly one of %IB_CQ_SOLICITED or %IB_CQ_NEXT_COMP * to request an event on the next solicited event or next work * completion at any type, respectively. %IB_CQ_REPORT_MISSED_EVENTS * may also be |ed in to request a hint about missed events, as * described below. * * Return Value: * < 0 means an error occurred while requesting notification * == 0 means notification was requested successfully, and if * IB_CQ_REPORT_MISSED_EVENTS was passed in, then no events * were missed and it is safe to wait for another event. In * this case is it guaranteed that any work completions added * to the CQ since the last CQ poll will trigger a completion * notification event. * > 0 is only returned if IB_CQ_REPORT_MISSED_EVENTS was passed * in. It means that the consumer must poll the CQ again to * make sure it is empty to avoid missing an event because of a * race between requesting notification and an entry being * added to the CQ. This return value means it is possible * (but not guaranteed) that a work completion has been added * to the CQ since the last poll without triggering a * completion notification event. */ static inline int ib_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags) { return cq->device->ops.req_notify_cq(cq, flags); } struct ib_cq *ib_cq_pool_get(struct ib_device *dev, unsigned int nr_cqe, int comp_vector_hint, enum ib_poll_context poll_ctx); void ib_cq_pool_put(struct ib_cq *cq, unsigned int nr_cqe); /* * Drivers that don't need a DMA mapping at the RDMA layer, set dma_device to * NULL. This causes the ib_dma* helpers to just stash the kernel virtual * address into the dma address. */ static inline bool ib_uses_virt_dma(struct ib_device *dev) { return IS_ENABLED(CONFIG_INFINIBAND_VIRT_DMA) && !dev->dma_device; } /** * ib_dma_mapping_error - check a DMA addr for error * @dev: The device for which the dma_addr was created * @dma_addr: The DMA address to check */ static inline int ib_dma_mapping_error(struct ib_device *dev, u64 dma_addr) { if (ib_uses_virt_dma(dev)) return 0; return dma_mapping_error(dev->dma_device, dma_addr); } /** * ib_dma_map_single - Map a kernel virtual address to DMA address * @dev: The device for which the dma_addr is to be created * @cpu_addr: The kernel virtual address * @size: The size of the region in bytes * @direction: The direction of the DMA */ static inline u64 ib_dma_map_single(struct ib_device *dev, void *cpu_addr, size_t size, enum dma_data_direction direction) { if (ib_uses_virt_dma(dev)) return (uintptr_t)cpu_addr; return dma_map_single(dev->dma_device, cpu_addr, size, direction); } /** * ib_dma_unmap_single - Destroy a mapping created by ib_dma_map_single() * @dev: The device for which the DMA address was created * @addr: The DMA address * @size: The size of the region in bytes * @direction: The direction of the DMA */ static inline void ib_dma_unmap_single(struct ib_device *dev, u64 addr, size_t size, enum dma_data_direction direction) { if (!ib_uses_virt_dma(dev)) dma_unmap_single(dev->dma_device, addr, size, direction); } /** * ib_dma_map_page - Map a physical page to DMA address * @dev: The device for which the dma_addr is to be created * @page: The page to be mapped * @offset: The offset within the page * @size: The size of the region in bytes * @direction: The direction of the DMA */ static inline u64 ib_dma_map_page(struct ib_device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction direction) { if (ib_uses_virt_dma(dev)) return (uintptr_t)(page_address(page) + offset); return dma_map_page(dev->dma_device, page, offset, size, direction); } /** * ib_dma_unmap_page - Destroy a mapping created by ib_dma_map_page() * @dev: The device for which the DMA address was created * @addr: The DMA address * @size: The size of the region in bytes * @direction: The direction of the DMA */ static inline void ib_dma_unmap_page(struct ib_device *dev, u64 addr, size_t size, enum dma_data_direction direction) { if (!ib_uses_virt_dma(dev)) dma_unmap_page(dev->dma_device, addr, size, direction); } int ib_dma_virt_map_sg(struct ib_device *dev, struct scatterlist *sg, int nents); static inline int ib_dma_map_sg_attrs(struct ib_device *dev, struct scatterlist *sg, int nents, enum dma_data_direction direction, unsigned long dma_attrs) { if (ib_uses_virt_dma(dev)) return ib_dma_virt_map_sg(dev, sg, nents); return dma_map_sg_attrs(dev->dma_device, sg, nents, direction, dma_attrs); } static inline void ib_dma_unmap_sg_attrs(struct ib_device *dev, struct scatterlist *sg, int nents, enum dma_data_direction direction, unsigned long dma_attrs) { if (!ib_uses_virt_dma(dev)) dma_unmap_sg_attrs(dev->dma_device, sg, nents, direction, dma_attrs); } /** * ib_dma_map_sgtable_attrs - Map a scatter/gather table to DMA addresses * @dev: The device for which the DMA addresses are to be created * @sg: The sg_table object describing the buffer * @direction: The direction of the DMA * @attrs: Optional DMA attributes for the map operation */ static inline int ib_dma_map_sgtable_attrs(struct ib_device *dev, struct sg_table *sgt, enum dma_data_direction direction, unsigned long dma_attrs) { int nents; if (ib_uses_virt_dma(dev)) { nents = ib_dma_virt_map_sg(dev, sgt->sgl, sgt->orig_nents); if (!nents) return -EIO; sgt->nents = nents; return 0; } return dma_map_sgtable(dev->dma_device, sgt, direction, dma_attrs); } static inline void ib_dma_unmap_sgtable_attrs(struct ib_device *dev, struct sg_table *sgt, enum dma_data_direction direction, unsigned long dma_attrs) { if (!ib_uses_virt_dma(dev)) dma_unmap_sgtable(dev->dma_device, sgt, direction, dma_attrs); } /** * ib_dma_map_sg - Map a scatter/gather list to DMA addresses * @dev: The device for which the DMA addresses are to be created * @sg: The array of scatter/gather entries * @nents: The number of scatter/gather entries * @direction: The direction of the DMA */ static inline int ib_dma_map_sg(struct ib_device *dev, struct scatterlist *sg, int nents, enum dma_data_direction direction) { return ib_dma_map_sg_attrs(dev, sg, nents, direction, 0); } /** * ib_dma_unmap_sg - Unmap a scatter/gather list of DMA addresses * @dev: The device for which the DMA addresses were created * @sg: The array of scatter/gather entries * @nents: The number of scatter/gather entries * @direction: The direction of the DMA */ static inline void ib_dma_unmap_sg(struct ib_device *dev, struct scatterlist *sg, int nents, enum dma_data_direction direction) { ib_dma_unmap_sg_attrs(dev, sg, nents, direction, 0); } /** * ib_dma_max_seg_size - Return the size limit of a single DMA transfer * @dev: The device to query * * The returned value represents a size in bytes. */ static inline unsigned int ib_dma_max_seg_size(struct ib_device *dev) { if (ib_uses_virt_dma(dev)) return UINT_MAX; return dma_get_max_seg_size(dev->dma_device); } /** * ib_dma_sync_single_for_cpu - Prepare DMA region to be accessed by CPU * @dev: The device for which the DMA address was created * @addr: The DMA address * @size: The size of the region in bytes * @dir: The direction of the DMA */ static inline void ib_dma_sync_single_for_cpu(struct ib_device *dev, u64 addr, size_t size, enum dma_data_direction dir) { if (!ib_uses_virt_dma(dev)) dma_sync_single_for_cpu(dev->dma_device, addr, size, dir); } /** * ib_dma_sync_single_for_device - Prepare DMA region to be accessed by device * @dev: The device for which the DMA address was created * @addr: The DMA address * @size: The size of the region in bytes * @dir: The direction of the DMA */ static inline void ib_dma_sync_single_for_device(struct ib_device *dev, u64 addr, size_t size, enum dma_data_direction dir) { if (!ib_uses_virt_dma(dev)) dma_sync_single_for_device(dev->dma_device, addr, size, dir); } /* ib_reg_user_mr - register a memory region for virtual addresses from kernel * space. This function should be called when 'current' is the owning MM. */ struct ib_mr *ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int mr_access_flags); /* ib_advise_mr - give an advice about an address range in a memory region */ int ib_advise_mr(struct ib_pd *pd, enum ib_uverbs_advise_mr_advice advice, u32 flags, struct ib_sge *sg_list, u32 num_sge); /** * ib_dereg_mr_user - Deregisters a memory region and removes it from the * HCA translation table. * @mr: The memory region to deregister. * @udata: Valid user data or NULL for kernel object * * This function can fail, if the memory region has memory windows bound to it. */ int ib_dereg_mr_user(struct ib_mr *mr, struct ib_udata *udata); /** * ib_dereg_mr - Deregisters a kernel memory region and removes it from the * HCA translation table. * @mr: The memory region to deregister. * * This function can fail, if the memory region has memory windows bound to it. * * NOTE: for user mr use ib_dereg_mr_user with valid udata! */ static inline int ib_dereg_mr(struct ib_mr *mr) { return ib_dereg_mr_user(mr, NULL); } struct ib_mr *ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, u32 max_num_sg); struct ib_mr *ib_alloc_mr_integrity(struct ib_pd *pd, u32 max_num_data_sg, u32 max_num_meta_sg); /** * ib_update_fast_reg_key - updates the key portion of the fast_reg MR * R_Key and L_Key. * @mr - struct ib_mr pointer to be updated. * @newkey - new key to be used. */ static inline void ib_update_fast_reg_key(struct ib_mr *mr, u8 newkey) { mr->lkey = (mr->lkey & 0xffffff00) | newkey; mr->rkey = (mr->rkey & 0xffffff00) | newkey; } /** * ib_inc_rkey - increments the key portion of the given rkey. Can be used * for calculating a new rkey for type 2 memory windows. * @rkey - the rkey to increment. */ static inline u32 ib_inc_rkey(u32 rkey) { const u32 mask = 0x000000ff; return ((rkey + 1) & mask) | (rkey & ~mask); } /** * ib_attach_mcast - Attaches the specified QP to a multicast group. * @qp: QP to attach to the multicast group. The QP must be type * IB_QPT_UD. * @gid: Multicast group GID. * @lid: Multicast group LID in host byte order. * * In order to send and receive multicast packets, subnet * administration must have created the multicast group and configured * the fabric appropriately. The port associated with the specified * QP must also be a member of the multicast group. */ int ib_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid); /** * ib_detach_mcast - Detaches the specified QP from a multicast group. * @qp: QP to detach from the multicast group. * @gid: Multicast group GID. * @lid: Multicast group LID in host byte order. */ int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid); struct ib_xrcd *ib_alloc_xrcd_user(struct ib_device *device, struct inode *inode, struct ib_udata *udata); int ib_dealloc_xrcd_user(struct ib_xrcd *xrcd, struct ib_udata *udata); static inline int ib_check_mr_access(struct ib_device *ib_dev, unsigned int flags) { /* * Local write permission is required if remote write or * remote atomic permission is also requested. */ if (flags & (IB_ACCESS_REMOTE_ATOMIC | IB_ACCESS_REMOTE_WRITE) && !(flags & IB_ACCESS_LOCAL_WRITE)) return -EINVAL; if (flags & ~IB_ACCESS_SUPPORTED) return -EINVAL; if (flags & IB_ACCESS_ON_DEMAND && !(ib_dev->attrs.device_cap_flags & IB_DEVICE_ON_DEMAND_PAGING)) return -EINVAL; return 0; } static inline bool ib_access_writable(int access_flags) { /* * We have writable memory backing the MR if any of the following * access flags are set. "Local write" and "remote write" obviously * require write access. "Remote atomic" can do things like fetch and * add, which will modify memory, and "MW bind" can change permissions * by binding a window. */ return access_flags & (IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_ATOMIC | IB_ACCESS_MW_BIND); } /** * ib_check_mr_status: lightweight check of MR status. * This routine may provide status checks on a selected * ib_mr. first use is for signature status check. * * @mr: A memory region. * @check_mask: Bitmask of which checks to perform from * ib_mr_status_check enumeration. * @mr_status: The container of relevant status checks. * failed checks will be indicated in the status bitmask * and the relevant info shall be in the error item. */ int ib_check_mr_status(struct ib_mr *mr, u32 check_mask, struct ib_mr_status *mr_status); /** * ib_device_try_get: Hold a registration lock * device: The device to lock * * A device under an active registration lock cannot become unregistered. It * is only possible to obtain a registration lock on a device that is fully * registered, otherwise this function returns false. * * The registration lock is only necessary for actions which require the * device to still be registered. Uses that only require the device pointer to * be valid should use get_device(&ibdev->dev) to hold the memory. * */ static inline bool ib_device_try_get(struct ib_device *dev) { return refcount_inc_not_zero(&dev->refcount); } void ib_device_put(struct ib_device *device); struct ib_device *ib_device_get_by_netdev(struct net_device *ndev, enum rdma_driver_id driver_id); struct ib_device *ib_device_get_by_name(const char *name, enum rdma_driver_id driver_id); struct net_device *ib_get_net_dev_by_params(struct ib_device *dev, u32 port, u16 pkey, const union ib_gid *gid, const struct sockaddr *addr); int ib_device_set_netdev(struct ib_device *ib_dev, struct net_device *ndev, unsigned int port); struct net_device *ib_device_netdev(struct ib_device *dev, u32 port); struct ib_wq *ib_create_wq(struct ib_pd *pd, struct ib_wq_init_attr *init_attr); int ib_destroy_wq_user(struct ib_wq *wq, struct ib_udata *udata); int ib_map_mr_sg(struct ib_mr *mr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset, unsigned int page_size); int ib_map_mr_sg_pi(struct ib_mr *mr, struct scatterlist *data_sg, int data_sg_nents, unsigned int *data_sg_offset, struct scatterlist *meta_sg, int meta_sg_nents, unsigned int *meta_sg_offset, unsigned int page_size); static inline int ib_map_mr_sg_zbva(struct ib_mr *mr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset, unsigned int page_size) { int n; n = ib_map_mr_sg(mr, sg, sg_nents, sg_offset, page_size); mr->iova = 0; return n; } int ib_sg_to_pages(struct ib_mr *mr, struct scatterlist *sgl, int sg_nents, unsigned int *sg_offset, int (*set_page)(struct ib_mr *, u64)); void ib_drain_rq(struct ib_qp *qp); void ib_drain_sq(struct ib_qp *qp); void ib_drain_qp(struct ib_qp *qp); int ib_get_eth_speed(struct ib_device *dev, u32 port_num, u16 *speed, u8 *width); static inline u8 *rdma_ah_retrieve_dmac(struct rdma_ah_attr *attr) { if (attr->type == RDMA_AH_ATTR_TYPE_ROCE) return attr->roce.dmac; return NULL; } static inline void rdma_ah_set_dlid(struct rdma_ah_attr *attr, u32 dlid) { if (attr->type == RDMA_AH_ATTR_TYPE_IB) attr->ib.dlid = (u16)dlid; else if (attr->type == RDMA_AH_ATTR_TYPE_OPA) attr->opa.dlid = dlid; } static inline u32 rdma_ah_get_dlid(const struct rdma_ah_attr *attr) { if (attr->type == RDMA_AH_ATTR_TYPE_IB) return attr->ib.dlid; else if (attr->type == RDMA_AH_ATTR_TYPE_OPA) return attr->opa.dlid; return 0; } static inline void rdma_ah_set_sl(struct rdma_ah_attr *attr, u8 sl) { attr->sl = sl; } static inline u8 rdma_ah_get_sl(const struct rdma_ah_attr *attr) { return attr->sl; } static inline void rdma_ah_set_path_bits(struct rdma_ah_attr *attr, u8 src_path_bits) { if (attr->type == RDMA_AH_ATTR_TYPE_IB) attr->ib.src_path_bits = src_path_bits; else if (attr->type == RDMA_AH_ATTR_TYPE_OPA) attr->opa.src_path_bits = src_path_bits; } static inline u8 rdma_ah_get_path_bits(const struct rdma_ah_attr *attr) { if (attr->type == RDMA_AH_ATTR_TYPE_IB) return attr->ib.src_path_bits; else if (attr->type == RDMA_AH_ATTR_TYPE_OPA) return attr->opa.src_path_bits; return 0; } static inline void rdma_ah_set_make_grd(struct rdma_ah_attr *attr, bool make_grd) { if (attr->type == RDMA_AH_ATTR_TYPE_OPA) attr->opa.make_grd = make_grd; } static inline bool rdma_ah_get_make_grd(const struct rdma_ah_attr *attr) { if (attr->type == RDMA_AH_ATTR_TYPE_OPA) return attr->opa.make_grd; return false; } static inline void rdma_ah_set_port_num(struct rdma_ah_attr *attr, u32 port_num) { attr->port_num = port_num; } static inline u32 rdma_ah_get_port_num(const struct rdma_ah_attr *attr) { return attr->port_num; } static inline void rdma_ah_set_static_rate(struct rdma_ah_attr *attr, u8 static_rate) { attr->static_rate = static_rate; } static inline u8 rdma_ah_get_static_rate(const struct rdma_ah_attr *attr) { return attr->static_rate; } static inline void rdma_ah_set_ah_flags(struct rdma_ah_attr *attr, enum ib_ah_flags flag) { attr->ah_flags = flag; } static inline enum ib_ah_flags rdma_ah_get_ah_flags(const struct rdma_ah_attr *attr) { return attr->ah_flags; } static inline const struct ib_global_route *rdma_ah_read_grh(const struct rdma_ah_attr *attr) { return &attr->grh; } /*To retrieve and modify the grh */ static inline struct ib_global_route *rdma_ah_retrieve_grh(struct rdma_ah_attr *attr) { return &attr->grh; } static inline void rdma_ah_set_dgid_raw(struct rdma_ah_attr *attr, void *dgid) { struct ib_global_route *grh = rdma_ah_retrieve_grh(attr); memcpy(grh->dgid.raw, dgid, sizeof(grh->dgid)); } static inline void rdma_ah_set_subnet_prefix(struct rdma_ah_attr *attr, __be64 prefix) { struct ib_global_route *grh = rdma_ah_retrieve_grh(attr); grh->dgid.global.subnet_prefix = prefix; } static inline void rdma_ah_set_interface_id(struct rdma_ah_attr *attr, __be64 if_id) { struct ib_global_route *grh = rdma_ah_retrieve_grh(attr); grh->dgid.global.interface_id = if_id; } static inline void rdma_ah_set_grh(struct rdma_ah_attr *attr, union ib_gid *dgid, u32 flow_label, u8 sgid_index, u8 hop_limit, u8 traffic_class) { struct ib_global_route *grh = rdma_ah_retrieve_grh(attr); attr->ah_flags = IB_AH_GRH; if (dgid) grh->dgid = *dgid; grh->flow_label = flow_label; grh->sgid_index = sgid_index; grh->hop_limit = hop_limit; grh->traffic_class = traffic_class; grh->sgid_attr = NULL; } void rdma_destroy_ah_attr(struct rdma_ah_attr *ah_attr); void rdma_move_grh_sgid_attr(struct rdma_ah_attr *attr, union ib_gid *dgid, u32 flow_label, u8 hop_limit, u8 traffic_class, const struct ib_gid_attr *sgid_attr); void rdma_copy_ah_attr(struct rdma_ah_attr *dest, const struct rdma_ah_attr *src); void rdma_replace_ah_attr(struct rdma_ah_attr *old, const struct rdma_ah_attr *new); void rdma_move_ah_attr(struct rdma_ah_attr *dest, struct rdma_ah_attr *src); /** * rdma_ah_find_type - Return address handle type. * * @dev: Device to be checked * @port_num: Port number */ static inline enum rdma_ah_attr_type rdma_ah_find_type(struct ib_device *dev, u32 port_num) { if (rdma_protocol_roce(dev, port_num)) return RDMA_AH_ATTR_TYPE_ROCE; if (rdma_protocol_ib(dev, port_num)) { if (rdma_cap_opa_ah(dev, port_num)) return RDMA_AH_ATTR_TYPE_OPA; return RDMA_AH_ATTR_TYPE_IB; } return RDMA_AH_ATTR_TYPE_UNDEFINED; } /** * ib_lid_cpu16 - Return lid in 16bit CPU encoding. * In the current implementation the only way to get * get the 32bit lid is from other sources for OPA. * For IB, lids will always be 16bits so cast the * value accordingly. * * @lid: A 32bit LID */ static inline u16 ib_lid_cpu16(u32 lid) { WARN_ON_ONCE(lid & 0xFFFF0000); return (u16)lid; } /** * ib_lid_be16 - Return lid in 16bit BE encoding. * * @lid: A 32bit LID */ static inline __be16 ib_lid_be16(u32 lid) { WARN_ON_ONCE(lid & 0xFFFF0000); return cpu_to_be16((u16)lid); } /** * ib_get_vector_affinity - Get the affinity mappings of a given completion * vector * @device: the rdma device * @comp_vector: index of completion vector * * Returns NULL on failure, otherwise a corresponding cpu map of the * completion vector (returns all-cpus map if the device driver doesn't * implement get_vector_affinity). */ static inline const struct cpumask * ib_get_vector_affinity(struct ib_device *device, int comp_vector) { if (comp_vector < 0 || comp_vector >= device->num_comp_vectors || !device->ops.get_vector_affinity) return NULL; return device->ops.get_vector_affinity(device, comp_vector); } /** * rdma_roce_rescan_device - Rescan all of the network devices in the system * and add their gids, as needed, to the relevant RoCE devices. * * @device: the rdma device */ void rdma_roce_rescan_device(struct ib_device *ibdev); struct ib_ucontext *ib_uverbs_get_ucontext_file(struct ib_uverbs_file *ufile); int uverbs_destroy_def_handler(struct uverbs_attr_bundle *attrs); struct net_device *rdma_alloc_netdev(struct ib_device *device, u32 port_num, enum rdma_netdev_t type, const char *name, unsigned char name_assign_type, void (*setup)(struct net_device *)); int rdma_init_netdev(struct ib_device *device, u32 port_num, enum rdma_netdev_t type, const char *name, unsigned char name_assign_type, void (*setup)(struct net_device *), struct net_device *netdev); /** * rdma_device_to_ibdev - Get ib_device pointer from device pointer * * @device: device pointer for which ib_device pointer to retrieve * * rdma_device_to_ibdev() retrieves ib_device pointer from device. * */ static inline struct ib_device *rdma_device_to_ibdev(struct device *device) { struct ib_core_device *coredev = container_of(device, struct ib_core_device, dev); return coredev->owner; } /** * ibdev_to_node - return the NUMA node for a given ib_device * @dev: device to get the NUMA node for. */ static inline int ibdev_to_node(struct ib_device *ibdev) { struct device *parent = ibdev->dev.parent; if (!parent) return NUMA_NO_NODE; return dev_to_node(parent); } /** * rdma_device_to_drv_device - Helper macro to reach back to driver's * ib_device holder structure from device pointer. * * NOTE: New drivers should not make use of this API; This API is only for * existing drivers who have exposed sysfs entries using * ops->device_group. */ #define rdma_device_to_drv_device(dev, drv_dev_struct, ibdev_member) \ container_of(rdma_device_to_ibdev(dev), drv_dev_struct, ibdev_member) bool rdma_dev_access_netns(const struct ib_device *device, const struct net *net); #define IB_ROCE_UDP_ENCAP_VALID_PORT_MIN (0xC000) #define IB_ROCE_UDP_ENCAP_VALID_PORT_MAX (0xFFFF) #define IB_GRH_FLOWLABEL_MASK (0x000FFFFF) /** * rdma_flow_label_to_udp_sport - generate a RoCE v2 UDP src port value based * on the flow_label * * This function will convert the 20 bit flow_label input to a valid RoCE v2 * UDP src port 14 bit value. All RoCE V2 drivers should use this same * convention. */ static inline u16 rdma_flow_label_to_udp_sport(u32 fl) { u32 fl_low = fl & 0x03fff, fl_high = fl & 0xFC000; fl_low ^= fl_high >> 14; return (u16)(fl_low | IB_ROCE_UDP_ENCAP_VALID_PORT_MIN); } /** * rdma_calc_flow_label - generate a RDMA symmetric flow label value based on * local and remote qpn values * * This function folded the multiplication results of two qpns, 24 bit each, * fields, and converts it to a 20 bit results. * * This function will create symmetric flow_label value based on the local * and remote qpn values. this will allow both the requester and responder * to calculate the same flow_label for a given connection. * * This helper function should be used by driver in case the upper layer * provide a zero flow_label value. This is to improve entropy of RDMA * traffic in the network. */ static inline u32 rdma_calc_flow_label(u32 lqpn, u32 rqpn) { u64 v = (u64)lqpn * rqpn; v ^= v >> 20; v ^= v >> 40; return (u32)(v & IB_GRH_FLOWLABEL_MASK); } const struct ib_port_immutable* ib_port_immutable_read(struct ib_device *dev, unsigned int port); #endif /* IB_VERBS_H */ PK ! /�g� � rdma/rdmavt_mr.hnu �[��� /* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ /* * Copyright(c) 2016 Intel Corporation. */ #ifndef DEF_RDMAVT_INCMR_H #define DEF_RDMAVT_INCMR_H /* * For Memory Regions. This stuff should probably be moved into rdmavt/mr.h once * drivers no longer need access to the MR directly. */ #include <linux/percpu-refcount.h> /* * A segment is a linear region of low physical memory. * Used by the verbs layer. */ struct rvt_seg { void *vaddr; size_t length; }; /* The number of rvt_segs that fit in a page. */ #define RVT_SEGSZ (PAGE_SIZE / sizeof(struct rvt_seg)) struct rvt_segarray { struct rvt_seg segs[RVT_SEGSZ]; }; struct rvt_mregion { struct ib_pd *pd; /* shares refcnt of ibmr.pd */ u64 user_base; /* User's address for this region */ u64 iova; /* IB start address of this region */ size_t length; u32 lkey; u32 offset; /* offset (bytes) to start of region */ int access_flags; u32 max_segs; /* number of rvt_segs in all the arrays */ u32 mapsz; /* size of the map array */ atomic_t lkey_invalid; /* true if current lkey is invalid */ u8 page_shift; /* 0 - non unform/non powerof2 sizes */ u8 lkey_published; /* in global table */ struct percpu_ref refcount; struct completion comp; /* complete when refcount goes to zero */ struct rvt_segarray *map[]; /* the segments */ }; #define RVT_MAX_LKEY_TABLE_BITS 23 struct rvt_lkey_table { /* read mostly fields */ u32 max; /* size of the table */ u32 shift; /* lkey/rkey shift */ struct rvt_mregion __rcu **table; /* writeable fields */ /* protect changes in this struct */ spinlock_t lock ____cacheline_aligned_in_smp; u32 next; /* next unused index (speeds search) */ u32 gen; /* generation count */ }; /* * These keep track of the copy progress within a memory region. * Used by the verbs layer. */ struct rvt_sge { struct rvt_mregion *mr; void *vaddr; /* kernel virtual address of segment */ u32 sge_length; /* length of the SGE */ u32 length; /* remaining length of the segment */ u16 m; /* current index: mr->map[m] */ u16 n; /* current index: mr->map[m]->segs[n] */ }; struct rvt_sge_state { struct rvt_sge *sg_list; /* next SGE to be used if any */ struct rvt_sge sge; /* progress state for the current SGE */ u32 total_len; u8 num_sge; }; static inline void rvt_put_mr(struct rvt_mregion *mr) { percpu_ref_put(&mr->refcount); } static inline void rvt_get_mr(struct rvt_mregion *mr) { percpu_ref_get(&mr->refcount); } static inline void rvt_put_ss(struct rvt_sge_state *ss) { while (ss->num_sge) { rvt_put_mr(ss->sge.mr); if (--ss->num_sge) ss->sge = *ss->sg_list++; } } static inline u32 rvt_get_sge_length(struct rvt_sge *sge, u32 length) { u32 len = sge->length; if (len > length) len = length; if (len > sge->sge_length) len = sge->sge_length; return len; } static inline void rvt_update_sge(struct rvt_sge_state *ss, u32 length, bool release) { struct rvt_sge *sge = &ss->sge; sge->vaddr += length; sge->length -= length; sge->sge_length -= length; if (sge->sge_length == 0) { if (release) rvt_put_mr(sge->mr); if (--ss->num_sge) *sge = *ss->sg_list++; } else if (sge->length == 0 && sge->mr->lkey) { if (++sge->n >= RVT_SEGSZ) { if (++sge->m >= sge->mr->mapsz) return; sge->n = 0; } sge->vaddr = sge->mr->map[sge->m]->segs[sge->n].vaddr; sge->length = sge->mr->map[sge->m]->segs[sge->n].length; } } static inline void rvt_skip_sge(struct rvt_sge_state *ss, u32 length, bool release) { struct rvt_sge *sge = &ss->sge; while (length) { u32 len = rvt_get_sge_length(sge, length); WARN_ON_ONCE(len == 0); rvt_update_sge(ss, len, release); length -= len; } } bool rvt_ss_has_lkey(struct rvt_sge_state *ss, u32 lkey); bool rvt_mr_has_lkey(struct rvt_mregion *mr, u32 lkey); #endif /* DEF_RDMAVT_INCMRH */ PK ! �M:�" " rdma/rdma_counter.hnu �[��� /* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ /* * Copyright (c) 2019 Mellanox Technologies. All rights reserved. */ #ifndef _RDMA_COUNTER_H_ #define _RDMA_COUNTER_H_ #include <linux/mutex.h> #include <linux/pid_namespace.h> #include <rdma/restrack.h> #include <rdma/rdma_netlink.h> struct ib_device; struct ib_qp; struct auto_mode_param { int qp_type; }; struct rdma_counter_mode { enum rdma_nl_counter_mode mode; enum rdma_nl_counter_mask mask; struct auto_mode_param param; }; struct rdma_port_counter { struct rdma_counter_mode mode; struct rdma_hw_stats *hstats; unsigned int num_counters; struct mutex lock; }; struct rdma_counter { struct rdma_restrack_entry res; struct ib_device *device; uint32_t id; struct kref kref; struct rdma_counter_mode mode; struct mutex lock; struct rdma_hw_stats *stats; u32 port; }; void rdma_counter_init(struct ib_device *dev); void rdma_counter_release(struct ib_device *dev); int rdma_counter_set_auto_mode(struct ib_device *dev, u32 port, enum rdma_nl_counter_mask mask, struct netlink_ext_ack *extack); int rdma_counter_bind_qp_auto(struct ib_qp *qp, u32 port); int rdma_counter_unbind_qp(struct ib_qp *qp, bool force); int rdma_counter_query_stats(struct rdma_counter *counter); u64 rdma_counter_get_hwstat_value(struct ib_device *dev, u32 port, u32 index); int rdma_counter_bind_qpn(struct ib_device *dev, u32 port, u32 qp_num, u32 counter_id); int rdma_counter_bind_qpn_alloc(struct ib_device *dev, u32 port, u32 qp_num, u32 *counter_id); int rdma_counter_unbind_qpn(struct ib_device *dev, u32 port, u32 qp_num, u32 counter_id); int rdma_counter_get_mode(struct ib_device *dev, u32 port, enum rdma_nl_counter_mode *mode, enum rdma_nl_counter_mask *mask); #endif /* _RDMA_COUNTER_H_ */ PK ! N�0�� � rdma/peer_mem.hnu �[��� /* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ /* * Copyright (c) 2014-2020, Mellanox Technologies. All rights reserved. */ #ifndef RDMA_PEER_MEM_H #define RDMA_PEER_MEM_H #include <linux/scatterlist.h> #define IB_PEER_MEMORY_NAME_MAX 64 #define IB_PEER_MEMORY_VER_MAX 16 /* * Prior versions used a void * for core_context, at some point this was * switched to use u64. Be careful if compiling this as 32 bit. To help the * value of core_context is limited to u32 so it should work OK despite the * type change. */ #define PEER_MEM_U64_CORE_CONTEXT struct device; /** * struct peer_memory_client - registration information for user virtual * memory handlers * * The peer_memory_client scheme allows a driver to register with the ib_umem * system that it has the ability to understand user virtual address ranges * that are not compatible with get_user_pages(). For instance VMAs created * with io_remap_pfn_range(), or other driver special VMA. * * For ranges the interface understands it can provide a DMA mapped sg_table * for use by the ib_umem, allowing user virtual ranges that cannot be * supported by get_user_pages() to be used as umems. */ struct peer_memory_client { char name[IB_PEER_MEMORY_NAME_MAX]; char version[IB_PEER_MEMORY_VER_MAX]; /** * acquire - Begin working with a user space virtual address range * * @addr - Virtual address to be checked whether belongs to peer. * @size - Length of the virtual memory area starting at addr. * @peer_mem_private_data - Obsolete, always NULL * @peer_mem_name - Obsolete, always NULL * @client_context - Returns an opaque value for this acquire use in * other APIs * * Returns 1 if the peer_memory_client supports the entire virtual * address range, 0 or -ERRNO otherwise. If 1 is returned then * release() will be called to release the acquire(). */ int (*acquire)(unsigned long addr, size_t size, void *peer_mem_private_data, char *peer_mem_name, void **client_context); /** * get_pages - Fill in the first part of a sg_table for a virtual * address range * * @addr - Virtual address to be checked whether belongs to peer. * @size - Length of the virtual memory area starting at addr. * @write - Always 1 * @force - 1 if write is required * @sg_head - Obsolete, always NULL * @client_context - Value returned by acquire() * @core_context - Value to be passed to invalidate_peer_memory for * this get * * addr/size are passed as the raw virtual address range requested by * the user, it is not aligned to any page size. get_pages() is always * followed by dma_map(). * * Upon return the caller can call the invalidate_callback(). * * Returns 0 on success, -ERRNO on failure. After success put_pages() * will be called to return the pages. */ int (*get_pages)(unsigned long addr, size_t size, int write, int force, struct sg_table *sg_head, void *client_context, u64 core_context); /** * dma_map - Create a DMA mapped sg_table * * @sg_head - The sg_table to allocate * @client_context - Value returned by acquire() * @dma_device - The device that will be doing DMA from these addresses * @dmasync - Obsolete, always 0 * @nmap - Returns the number of dma mapped entries in the sg_head * * Must be called after get_pages(). This must fill in the sg_head with * DMA mapped SGLs for dma_device. Each SGL start and end must meet a * minimum alignment of at least PAGE_SIZE, though individual sgls can * be multiples of PAGE_SIZE, in any mixture. Since the user virtual * address/size are not page aligned, the implementation must increase * it to the logical alignment when building the SGLs. * * Returns 0 on success, -ERRNO on failure. After success dma_unmap() * will be called to unmap the pages. On failure sg_head must be left * untouched or point to a valid sg_table. */ int (*dma_map)(struct sg_table *sg_head, void *client_context, struct device *dma_device, int dmasync, int *nmap); /** * dma_unmap - Unmap a DMA mapped sg_table * * @sg_head - The sg_table to unmap * @client_context - Value returned by acquire() * @dma_device - The device that will be doing DMA from these addresses * * sg_head will not be touched after this function returns. * * Must return 0. */ int (*dma_unmap)(struct sg_table *sg_head, void *client_context, struct device *dma_device); /** * put_pages - Unpin a SGL * * @sg_head - The sg_table to unpin * @client_context - Value returned by acquire() * * sg_head must be freed on return. */ void (*put_pages)(struct sg_table *sg_head, void *client_context); /* Client should always return PAGE_SIZE */ unsigned long (*get_page_size)(void *client_context); /** * release - Undo acquire * * @client_context - Value returned by acquire() * * If acquire() returns 1 then release() must be called. All * get_pages() and dma_map()'s must be undone before calling this * function. */ void (*release)(void *client_context); }; enum { PEER_MEM_INVALIDATE_UNMAPS = 1 << 0, }; struct peer_memory_client_ex { struct peer_memory_client client; size_t ex_size; u32 flags; }; /* * If invalidate_callback() is non-NULL then the client will only support * umems which can be invalidated. The caller may call the * invalidate_callback() after acquire() on return the range will no longer * have DMA active, and release() will have been called. * * Note: The implementation locking must ensure that get_pages(), and * dma_map() do not have locking dependencies with invalidate_callback(). The * ib_core will wait until any concurrent get_pages() or dma_map() completes * before returning. * * Similarly, this can call dma_unmap(), put_pages() and release() from within * the callback, or will wait for another thread doing those operations to * complete. * * For these reasons the user of invalidate_callback() must be careful with * locking. */ typedef int (*invalidate_peer_memory)(void *reg_handle, u64 core_context); void * ib_register_peer_memory_client(const struct peer_memory_client *peer_client, invalidate_peer_memory *invalidate_callback); void ib_unregister_peer_memory_client(void *reg_handle); #endif PK ! �G%� � rdma/ib_umem_odp.hnu �[��� /* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ /* * Copyright (c) 2014 Mellanox Technologies. All rights reserved. */ #ifndef IB_UMEM_ODP_H #define IB_UMEM_ODP_H #include <rdma/ib_umem.h> #include <rdma/ib_verbs.h> struct ib_umem_odp { struct ib_umem umem; struct mmu_interval_notifier notifier; struct pid *tgid; /* An array of the pfns included in the on-demand paging umem. */ unsigned long *pfn_list; /* * An array with DMA addresses mapped for pfns in pfn_list. * The lower two bits designate access permissions. * See ODP_READ_ALLOWED_BIT and ODP_WRITE_ALLOWED_BIT. */ dma_addr_t *dma_list; /* * The umem_mutex protects the page_list and dma_list fields of an ODP * umem, allowing only a single thread to map/unmap pages. The mutex * also protects access to the mmu notifier counters. */ struct mutex umem_mutex; void *private; /* for the HW driver to use. */ int npages; /* * An implicit odp umem cannot be DMA mapped, has 0 length, and serves * only as an anchor for the driver to hold onto the per_mm. FIXME: * This should be removed and drivers should work with the per_mm * directly. */ bool is_implicit_odp; unsigned int page_shift; }; static inline struct ib_umem_odp *to_ib_umem_odp(struct ib_umem *umem) { return container_of(umem, struct ib_umem_odp, umem); } /* Returns the first page of an ODP umem. */ static inline unsigned long ib_umem_start(struct ib_umem_odp *umem_odp) { return umem_odp->notifier.interval_tree.start; } /* Returns the address of the page after the last one of an ODP umem. */ static inline unsigned long ib_umem_end(struct ib_umem_odp *umem_odp) { return umem_odp->notifier.interval_tree.last + 1; } static inline size_t ib_umem_odp_num_pages(struct ib_umem_odp *umem_odp) { return (ib_umem_end(umem_odp) - ib_umem_start(umem_odp)) >> umem_odp->page_shift; } /* * The lower 2 bits of the DMA address signal the R/W permissions for * the entry. To upgrade the permissions, provide the appropriate * bitmask to the map_dma_pages function. * * Be aware that upgrading a mapped address might result in change of * the DMA address for the page. */ #define ODP_READ_ALLOWED_BIT (1<<0ULL) #define ODP_WRITE_ALLOWED_BIT (1<<1ULL) #define ODP_DMA_ADDR_MASK (~(ODP_READ_ALLOWED_BIT | ODP_WRITE_ALLOWED_BIT)) #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING struct ib_umem_odp * ib_umem_odp_get(struct ib_device *device, unsigned long addr, size_t size, int access, const struct mmu_interval_notifier_ops *ops); struct ib_umem_odp *ib_umem_odp_alloc_implicit(struct ib_device *device, int access); struct ib_umem_odp * ib_umem_odp_alloc_child(struct ib_umem_odp *root_umem, unsigned long addr, size_t size, const struct mmu_interval_notifier_ops *ops); void ib_umem_odp_release(struct ib_umem_odp *umem_odp); int ib_umem_odp_map_dma_and_lock(struct ib_umem_odp *umem_odp, u64 start_offset, u64 bcnt, u64 access_mask, bool fault); void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 start_offset, u64 bound); #else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */ static inline struct ib_umem_odp * ib_umem_odp_get(struct ib_device *device, unsigned long addr, size_t size, int access, const struct mmu_interval_notifier_ops *ops) { return ERR_PTR(-EINVAL); } static inline void ib_umem_odp_release(struct ib_umem_odp *umem_odp) {} #endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */ #endif /* IB_UMEM_ODP_H */ PK ! ���� � rdma/ib_pack.hnu �[��� /* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ /* * Copyright (c) 2004 Topspin Corporation. All rights reserved. */ #ifndef IB_PACK_H #define IB_PACK_H #include <rdma/ib_verbs.h> #include <uapi/linux/if_ether.h> enum { IB_LRH_BYTES = 8, IB_ETH_BYTES = 14, IB_VLAN_BYTES = 4, IB_GRH_BYTES = 40, IB_IP4_BYTES = 20, IB_UDP_BYTES = 8, IB_BTH_BYTES = 12, IB_DETH_BYTES = 8, IB_EXT_ATOMICETH_BYTES = 28, IB_EXT_XRC_BYTES = 4, IB_ICRC_BYTES = 4 }; struct ib_field { size_t struct_offset_bytes; size_t struct_size_bytes; int offset_words; int offset_bits; int size_bits; char *field_name; }; #define RESERVED \ .field_name = "reserved" /* * This macro cleans up the definitions of constants for BTH opcodes. * It is used to define constants such as IB_OPCODE_UD_SEND_ONLY, * which becomes IB_OPCODE_UD + IB_OPCODE_SEND_ONLY, and this gives * the correct value. * * In short, user code should use the constants defined using the * macro rather than worrying about adding together other constants. */ #define IB_OPCODE(transport, op) \ IB_OPCODE_ ## transport ## _ ## op = \ IB_OPCODE_ ## transport + IB_OPCODE_ ## op enum { /* transport types -- just used to define real constants */ IB_OPCODE_RC = 0x00, IB_OPCODE_UC = 0x20, IB_OPCODE_RD = 0x40, IB_OPCODE_UD = 0x60, /* per IBTA 1.3 vol 1 Table 38, A10.3.2 */ IB_OPCODE_CNP = 0x80, /* Manufacturer specific */ IB_OPCODE_MSP = 0xe0, /* operations -- just used to define real constants */ IB_OPCODE_SEND_FIRST = 0x00, IB_OPCODE_SEND_MIDDLE = 0x01, IB_OPCODE_SEND_LAST = 0x02, IB_OPCODE_SEND_LAST_WITH_IMMEDIATE = 0x03, IB_OPCODE_SEND_ONLY = 0x04, IB_OPCODE_SEND_ONLY_WITH_IMMEDIATE = 0x05, IB_OPCODE_RDMA_WRITE_FIRST = 0x06, IB_OPCODE_RDMA_WRITE_MIDDLE = 0x07, IB_OPCODE_RDMA_WRITE_LAST = 0x08, IB_OPCODE_RDMA_WRITE_LAST_WITH_IMMEDIATE = 0x09, IB_OPCODE_RDMA_WRITE_ONLY = 0x0a, IB_OPCODE_RDMA_WRITE_ONLY_WITH_IMMEDIATE = 0x0b, IB_OPCODE_RDMA_READ_REQUEST = 0x0c, IB_OPCODE_RDMA_READ_RESPONSE_FIRST = 0x0d, IB_OPCODE_RDMA_READ_RESPONSE_MIDDLE = 0x0e, IB_OPCODE_RDMA_READ_RESPONSE_LAST = 0x0f, IB_OPCODE_RDMA_READ_RESPONSE_ONLY = 0x10, IB_OPCODE_ACKNOWLEDGE = 0x11, IB_OPCODE_ATOMIC_ACKNOWLEDGE = 0x12, IB_OPCODE_COMPARE_SWAP = 0x13, IB_OPCODE_FETCH_ADD = 0x14, /* opcode 0x15 is reserved */ IB_OPCODE_SEND_LAST_WITH_INVALIDATE = 0x16, IB_OPCODE_SEND_ONLY_WITH_INVALIDATE = 0x17, /* real constants follow -- see comment about above IB_OPCODE() macro for more details */ /* RC */ IB_OPCODE(RC, SEND_FIRST), IB_OPCODE(RC, SEND_MIDDLE), IB_OPCODE(RC, SEND_LAST), IB_OPCODE(RC, SEND_LAST_WITH_IMMEDIATE), IB_OPCODE(RC, SEND_ONLY), IB_OPCODE(RC, SEND_ONLY_WITH_IMMEDIATE), IB_OPCODE(RC, RDMA_WRITE_FIRST), IB_OPCODE(RC, RDMA_WRITE_MIDDLE), IB_OPCODE(RC, RDMA_WRITE_LAST), IB_OPCODE(RC, RDMA_WRITE_LAST_WITH_IMMEDIATE), IB_OPCODE(RC, RDMA_WRITE_ONLY), IB_OPCODE(RC, RDMA_WRITE_ONLY_WITH_IMMEDIATE), IB_OPCODE(RC, RDMA_READ_REQUEST), IB_OPCODE(RC, RDMA_READ_RESPONSE_FIRST), IB_OPCODE(RC, RDMA_READ_RESPONSE_MIDDLE), IB_OPCODE(RC, RDMA_READ_RESPONSE_LAST), IB_OPCODE(RC, RDMA_READ_RESPONSE_ONLY), IB_OPCODE(RC, ACKNOWLEDGE), IB_OPCODE(RC, ATOMIC_ACKNOWLEDGE), IB_OPCODE(RC, COMPARE_SWAP), IB_OPCODE(RC, FETCH_ADD), IB_OPCODE(RC, SEND_LAST_WITH_INVALIDATE), IB_OPCODE(RC, SEND_ONLY_WITH_INVALIDATE), /* UC */ IB_OPCODE(UC, SEND_FIRST), IB_OPCODE(UC, SEND_MIDDLE), IB_OPCODE(UC, SEND_LAST), IB_OPCODE(UC, SEND_LAST_WITH_IMMEDIATE), IB_OPCODE(UC, SEND_ONLY), IB_OPCODE(UC, SEND_ONLY_WITH_IMMEDIATE), IB_OPCODE(UC, RDMA_WRITE_FIRST), IB_OPCODE(UC, RDMA_WRITE_MIDDLE), IB_OPCODE(UC, RDMA_WRITE_LAST), IB_OPCODE(UC, RDMA_WRITE_LAST_WITH_IMMEDIATE), IB_OPCODE(UC, RDMA_WRITE_ONLY), IB_OPCODE(UC, RDMA_WRITE_ONLY_WITH_IMMEDIATE), /* RD */ IB_OPCODE(RD, SEND_FIRST), IB_OPCODE(RD, SEND_MIDDLE), IB_OPCODE(RD, SEND_LAST), IB_OPCODE(RD, SEND_LAST_WITH_IMMEDIATE), IB_OPCODE(RD, SEND_ONLY), IB_OPCODE(RD, SEND_ONLY_WITH_IMMEDIATE), IB_OPCODE(RD, RDMA_WRITE_FIRST), IB_OPCODE(RD, RDMA_WRITE_MIDDLE), IB_OPCODE(RD, RDMA_WRITE_LAST), IB_OPCODE(RD, RDMA_WRITE_LAST_WITH_IMMEDIATE), IB_OPCODE(RD, RDMA_WRITE_ONLY), IB_OPCODE(RD, RDMA_WRITE_ONLY_WITH_IMMEDIATE), IB_OPCODE(RD, RDMA_READ_REQUEST), IB_OPCODE(RD, RDMA_READ_RESPONSE_FIRST), IB_OPCODE(RD, RDMA_READ_RESPONSE_MIDDLE), IB_OPCODE(RD, RDMA_READ_RESPONSE_LAST), IB_OPCODE(RD, RDMA_READ_RESPONSE_ONLY), IB_OPCODE(RD, ACKNOWLEDGE), IB_OPCODE(RD, ATOMIC_ACKNOWLEDGE), IB_OPCODE(RD, COMPARE_SWAP), IB_OPCODE(RD, FETCH_ADD), /* UD */ IB_OPCODE(UD, SEND_ONLY), IB_OPCODE(UD, SEND_ONLY_WITH_IMMEDIATE) }; enum { IB_LNH_RAW = 0, IB_LNH_IP = 1, IB_LNH_IBA_LOCAL = 2, IB_LNH_IBA_GLOBAL = 3 }; struct ib_unpacked_lrh { u8 virtual_lane; u8 link_version; u8 service_level; u8 link_next_header; __be16 destination_lid; __be16 packet_length; __be16 source_lid; }; struct ib_unpacked_grh { u8 ip_version; u8 traffic_class; __be32 flow_label; __be16 payload_length; u8 next_header; u8 hop_limit; union ib_gid source_gid; union ib_gid destination_gid; }; struct ib_unpacked_bth { u8 opcode; u8 solicited_event; u8 mig_req; u8 pad_count; u8 transport_header_version; __be16 pkey; __be32 destination_qpn; u8 ack_req; __be32 psn; }; struct ib_unpacked_deth { __be32 qkey; __be32 source_qpn; }; struct ib_unpacked_eth { u8 dmac_h[4]; u8 dmac_l[2]; u8 smac_h[2]; u8 smac_l[4]; __be16 type; }; struct ib_unpacked_ip4 { u8 ver; u8 hdr_len; u8 tos; __be16 tot_len; __be16 id; __be16 frag_off; u8 ttl; u8 protocol; __sum16 check; __be32 saddr; __be32 daddr; }; struct ib_unpacked_udp { __be16 sport; __be16 dport; __be16 length; __be16 csum; }; struct ib_unpacked_vlan { __be16 tag; __be16 type; }; struct ib_ud_header { int lrh_present; struct ib_unpacked_lrh lrh; int eth_present; struct ib_unpacked_eth eth; int vlan_present; struct ib_unpacked_vlan vlan; int grh_present; struct ib_unpacked_grh grh; int ipv4_present; struct ib_unpacked_ip4 ip4; int udp_present; struct ib_unpacked_udp udp; struct ib_unpacked_bth bth; struct ib_unpacked_deth deth; int immediate_present; __be32 immediate_data; }; void ib_pack(const struct ib_field *desc, int desc_len, void *structure, void *buf); void ib_unpack(const struct ib_field *desc, int desc_len, void *buf, void *structure); __sum16 ib_ud_ip4_csum(struct ib_ud_header *header); int ib_ud_header_init(int payload_bytes, int lrh_present, int eth_present, int vlan_present, int grh_present, int ip_version, int udp_present, int immediate_present, struct ib_ud_header *header); int ib_ud_header_pack(struct ib_ud_header *header, void *buf); int ib_ud_header_unpack(void *buf, struct ib_ud_header *header); #endif /* IB_PACK_H */ PK ! aZM�3 �3 rdma/ibta_vol1_c12.hnu �[��� /* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ /* * Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. * * This file is IBTA volume 1, chapter 12 declarations: * CHAPTER 12: COMMUNICATION MANAGEMENT */ #ifndef _IBTA_VOL1_C12_H_ #define _IBTA_VOL1_C12_H_ #include <rdma/iba.h> #define CM_FIELD_BLOC(field_struct, byte_offset, bits_offset, width) \ IBA_FIELD_BLOC(field_struct, \ (byte_offset + sizeof(struct ib_mad_hdr)), bits_offset, \ width) #define CM_FIELD8_LOC(field_struct, byte_offset, width) \ IBA_FIELD8_LOC(field_struct, \ (byte_offset + sizeof(struct ib_mad_hdr)), width) #define CM_FIELD16_LOC(field_struct, byte_offset, width) \ IBA_FIELD16_LOC(field_struct, \ (byte_offset + sizeof(struct ib_mad_hdr)), width) #define CM_FIELD32_LOC(field_struct, byte_offset, width) \ IBA_FIELD32_LOC(field_struct, \ (byte_offset + sizeof(struct ib_mad_hdr)), width) #define CM_FIELD64_LOC(field_struct, byte_offset) \ IBA_FIELD64_LOC(field_struct, (byte_offset + sizeof(struct ib_mad_hdr))) #define CM_FIELD_MLOC(field_struct, byte_offset, width, type) \ IBA_FIELD_MLOC(field_struct, \ (byte_offset + sizeof(struct ib_mad_hdr)), width, type) #define CM_STRUCT(field_struct, total_len) \ field_struct \ { \ struct ib_mad_hdr hdr; \ u32 _data[(total_len) / 32 + \ BUILD_BUG_ON_ZERO((total_len) % 32 != 0)]; \ } /* Table 106 REQ Message Contents */ #define CM_REQ_LOCAL_COMM_ID CM_FIELD32_LOC(struct cm_req_msg, 0, 32) #define CM_REQ_VENDOR_ID CM_FIELD32_LOC(struct cm_req_msg, 5, 24) #define CM_REQ_SERVICE_ID CM_FIELD64_LOC(struct cm_req_msg, 8) #define CM_REQ_LOCAL_CA_GUID CM_FIELD64_LOC(struct cm_req_msg, 16) #define CM_REQ_LOCAL_Q_KEY CM_FIELD32_LOC(struct cm_req_msg, 28, 32) #define CM_REQ_LOCAL_QPN CM_FIELD32_LOC(struct cm_req_msg, 32, 24) #define CM_REQ_RESPONDER_RESOURCES CM_FIELD8_LOC(struct cm_req_msg, 35, 8) #define CM_REQ_LOCAL_EECN CM_FIELD32_LOC(struct cm_req_msg, 36, 24) #define CM_REQ_INITIATOR_DEPTH CM_FIELD8_LOC(struct cm_req_msg, 39, 8) #define CM_REQ_REMOTE_EECN CM_FIELD32_LOC(struct cm_req_msg, 40, 24) #define CM_REQ_REMOTE_CM_RESPONSE_TIMEOUT \ CM_FIELD8_LOC(struct cm_req_msg, 43, 5) #define CM_REQ_TRANSPORT_SERVICE_TYPE CM_FIELD_BLOC(struct cm_req_msg, 43, 5, 2) #define CM_REQ_END_TO_END_FLOW_CONTROL \ CM_FIELD_BLOC(struct cm_req_msg, 43, 7, 1) #define CM_REQ_STARTING_PSN CM_FIELD32_LOC(struct cm_req_msg, 44, 24) #define CM_REQ_LOCAL_CM_RESPONSE_TIMEOUT CM_FIELD8_LOC(struct cm_req_msg, 47, 5) #define CM_REQ_RETRY_COUNT CM_FIELD_BLOC(struct cm_req_msg, 47, 5, 3) #define CM_REQ_PARTITION_KEY CM_FIELD16_LOC(struct cm_req_msg, 48, 16) #define CM_REQ_PATH_PACKET_PAYLOAD_MTU CM_FIELD8_LOC(struct cm_req_msg, 50, 4) #define CM_REQ_RDC_EXISTS CM_FIELD_BLOC(struct cm_req_msg, 50, 4, 1) #define CM_REQ_RNR_RETRY_COUNT CM_FIELD_BLOC(struct cm_req_msg, 50, 5, 3) #define CM_REQ_MAX_CM_RETRIES CM_FIELD8_LOC(struct cm_req_msg, 51, 4) #define CM_REQ_SRQ CM_FIELD_BLOC(struct cm_req_msg, 51, 4, 1) #define CM_REQ_EXTENDED_TRANSPORT_TYPE \ CM_FIELD_BLOC(struct cm_req_msg, 51, 5, 3) #define CM_REQ_PRIMARY_LOCAL_PORT_LID CM_FIELD16_LOC(struct cm_req_msg, 52, 16) #define CM_REQ_PRIMARY_REMOTE_PORT_LID CM_FIELD16_LOC(struct cm_req_msg, 54, 16) #define CM_REQ_PRIMARY_LOCAL_PORT_GID \ CM_FIELD_MLOC(struct cm_req_msg, 56, 128, union ib_gid) #define CM_REQ_PRIMARY_REMOTE_PORT_GID \ CM_FIELD_MLOC(struct cm_req_msg, 72, 128, union ib_gid) #define CM_REQ_PRIMARY_FLOW_LABEL CM_FIELD32_LOC(struct cm_req_msg, 88, 20) #define CM_REQ_PRIMARY_PACKET_RATE CM_FIELD_BLOC(struct cm_req_msg, 91, 2, 6) #define CM_REQ_PRIMARY_TRAFFIC_CLASS CM_FIELD8_LOC(struct cm_req_msg, 92, 8) #define CM_REQ_PRIMARY_HOP_LIMIT CM_FIELD8_LOC(struct cm_req_msg, 93, 8) #define CM_REQ_PRIMARY_SL CM_FIELD8_LOC(struct cm_req_msg, 94, 4) #define CM_REQ_PRIMARY_SUBNET_LOCAL CM_FIELD_BLOC(struct cm_req_msg, 94, 4, 1) #define CM_REQ_PRIMARY_LOCAL_ACK_TIMEOUT CM_FIELD8_LOC(struct cm_req_msg, 95, 5) #define CM_REQ_ALTERNATE_LOCAL_PORT_LID \ CM_FIELD16_LOC(struct cm_req_msg, 96, 16) #define CM_REQ_ALTERNATE_REMOTE_PORT_LID \ CM_FIELD16_LOC(struct cm_req_msg, 98, 16) #define CM_REQ_ALTERNATE_LOCAL_PORT_GID \ CM_FIELD_MLOC(struct cm_req_msg, 100, 128, union ib_gid) #define CM_REQ_ALTERNATE_REMOTE_PORT_GID \ CM_FIELD_MLOC(struct cm_req_msg, 116, 128, union ib_gid) #define CM_REQ_ALTERNATE_FLOW_LABEL CM_FIELD32_LOC(struct cm_req_msg, 132, 20) #define CM_REQ_ALTERNATE_PACKET_RATE CM_FIELD_BLOC(struct cm_req_msg, 135, 2, 6) #define CM_REQ_ALTERNATE_TRAFFIC_CLASS CM_FIELD8_LOC(struct cm_req_msg, 136, 8) #define CM_REQ_ALTERNATE_HOP_LIMIT CM_FIELD8_LOC(struct cm_req_msg, 137, 8) #define CM_REQ_ALTERNATE_SL CM_FIELD8_LOC(struct cm_req_msg, 138, 4) #define CM_REQ_ALTERNATE_SUBNET_LOCAL \ CM_FIELD_BLOC(struct cm_req_msg, 138, 4, 1) #define CM_REQ_ALTERNATE_LOCAL_ACK_TIMEOUT \ CM_FIELD8_LOC(struct cm_req_msg, 139, 5) #define CM_REQ_SAP_SUPPORTED CM_FIELD_BLOC(struct cm_req_msg, 139, 5, 1) #define CM_REQ_PRIVATE_DATA CM_FIELD_MLOC(struct cm_req_msg, 140, 736, void) CM_STRUCT(struct cm_req_msg, 140 * 8 + 736); /* Table 107 MRA Message Contents */ #define CM_MRA_LOCAL_COMM_ID CM_FIELD32_LOC(struct cm_mra_msg, 0, 32) #define CM_MRA_REMOTE_COMM_ID CM_FIELD32_LOC(struct cm_mra_msg, 4, 32) #define CM_MRA_MESSAGE_MRAED CM_FIELD8_LOC(struct cm_mra_msg, 8, 2) #define CM_MRA_SERVICE_TIMEOUT CM_FIELD8_LOC(struct cm_mra_msg, 9, 5) #define CM_MRA_PRIVATE_DATA CM_FIELD_MLOC(struct cm_mra_msg, 10, 1776, void) CM_STRUCT(struct cm_mra_msg, 10 * 8 + 1776); /* Table 108 REJ Message Contents */ #define CM_REJ_LOCAL_COMM_ID CM_FIELD32_LOC(struct cm_rej_msg, 0, 32) #define CM_REJ_REMOTE_COMM_ID CM_FIELD32_LOC(struct cm_rej_msg, 4, 32) #define CM_REJ_MESSAGE_REJECTED CM_FIELD8_LOC(struct cm_rej_msg, 8, 2) #define CM_REJ_REJECTED_INFO_LENGTH CM_FIELD8_LOC(struct cm_rej_msg, 9, 7) #define CM_REJ_REASON CM_FIELD16_LOC(struct cm_rej_msg, 10, 16) #define CM_REJ_ARI CM_FIELD_MLOC(struct cm_rej_msg, 12, 576, void) #define CM_REJ_PRIVATE_DATA CM_FIELD_MLOC(struct cm_rej_msg, 84, 1184, void) CM_STRUCT(struct cm_rej_msg, 84 * 8 + 1184); /* Table 110 REP Message Contents */ #define CM_REP_LOCAL_COMM_ID CM_FIELD32_LOC(struct cm_rep_msg, 0, 32) #define CM_REP_REMOTE_COMM_ID CM_FIELD32_LOC(struct cm_rep_msg, 4, 32) #define CM_REP_LOCAL_Q_KEY CM_FIELD32_LOC(struct cm_rep_msg, 8, 32) #define CM_REP_LOCAL_QPN CM_FIELD32_LOC(struct cm_rep_msg, 12, 24) #define CM_REP_VENDOR_ID_H CM_FIELD8_LOC(struct cm_rep_msg, 15, 8) #define CM_REP_LOCAL_EE_CONTEXT_NUMBER CM_FIELD32_LOC(struct cm_rep_msg, 16, 24) #define CM_REP_VENDOR_ID_M CM_FIELD8_LOC(struct cm_rep_msg, 19, 8) #define CM_REP_STARTING_PSN CM_FIELD32_LOC(struct cm_rep_msg, 20, 24) #define CM_REP_VENDOR_ID_L CM_FIELD8_LOC(struct cm_rep_msg, 23, 8) #define CM_REP_RESPONDER_RESOURCES CM_FIELD8_LOC(struct cm_rep_msg, 24, 8) #define CM_REP_INITIATOR_DEPTH CM_FIELD8_LOC(struct cm_rep_msg, 25, 8) #define CM_REP_TARGET_ACK_DELAY CM_FIELD8_LOC(struct cm_rep_msg, 26, 5) #define CM_REP_FAILOVER_ACCEPTED CM_FIELD_BLOC(struct cm_rep_msg, 26, 5, 2) #define CM_REP_END_TO_END_FLOW_CONTROL \ CM_FIELD_BLOC(struct cm_rep_msg, 26, 7, 1) #define CM_REP_RNR_RETRY_COUNT CM_FIELD8_LOC(struct cm_rep_msg, 27, 3) #define CM_REP_SRQ CM_FIELD_BLOC(struct cm_rep_msg, 27, 3, 1) #define CM_REP_LOCAL_CA_GUID CM_FIELD64_LOC(struct cm_rep_msg, 28) #define CM_REP_PRIVATE_DATA CM_FIELD_MLOC(struct cm_rep_msg, 36, 1568, void) CM_STRUCT(struct cm_rep_msg, 36 * 8 + 1568); /* Table 111 RTU Message Contents */ #define CM_RTU_LOCAL_COMM_ID CM_FIELD32_LOC(struct cm_rtu_msg, 0, 32) #define CM_RTU_REMOTE_COMM_ID CM_FIELD32_LOC(struct cm_rtu_msg, 4, 32) #define CM_RTU_PRIVATE_DATA CM_FIELD_MLOC(struct cm_rtu_msg, 8, 1792, void) CM_STRUCT(struct cm_rtu_msg, 8 * 8 + 1792); /* Table 112 DREQ Message Contents */ #define CM_DREQ_LOCAL_COMM_ID CM_FIELD32_LOC(struct cm_dreq_msg, 0, 32) #define CM_DREQ_REMOTE_COMM_ID CM_FIELD32_LOC(struct cm_dreq_msg, 4, 32) #define CM_DREQ_REMOTE_QPN_EECN CM_FIELD32_LOC(struct cm_dreq_msg, 8, 24) #define CM_DREQ_PRIVATE_DATA CM_FIELD_MLOC(struct cm_dreq_msg, 12, 1760, void) CM_STRUCT(struct cm_dreq_msg, 12 * 8 + 1760); /* Table 113 DREP Message Contents */ #define CM_DREP_LOCAL_COMM_ID CM_FIELD32_LOC(struct cm_drep_msg, 0, 32) #define CM_DREP_REMOTE_COMM_ID CM_FIELD32_LOC(struct cm_drep_msg, 4, 32) #define CM_DREP_PRIVATE_DATA CM_FIELD_MLOC(struct cm_drep_msg, 8, 1792, void) CM_STRUCT(struct cm_drep_msg, 8 * 8 + 1792); /* Table 115 LAP Message Contents */ #define CM_LAP_LOCAL_COMM_ID CM_FIELD32_LOC(struct cm_lap_msg, 0, 32) #define CM_LAP_REMOTE_COMM_ID CM_FIELD32_LOC(struct cm_lap_msg, 4, 32) #define CM_LAP_REMOTE_QPN_EECN CM_FIELD32_LOC(struct cm_lap_msg, 12, 24) #define CM_LAP_REMOTE_CM_RESPONSE_TIMEOUT \ CM_FIELD8_LOC(struct cm_lap_msg, 15, 5) #define CM_LAP_ALTERNATE_LOCAL_PORT_LID \ CM_FIELD16_LOC(struct cm_lap_msg, 20, 16) #define CM_LAP_ALTERNATE_REMOTE_PORT_LID \ CM_FIELD16_LOC(struct cm_lap_msg, 22, 16) #define CM_LAP_ALTERNATE_LOCAL_PORT_GID \ CM_FIELD_MLOC(struct cm_lap_msg, 24, 128, union ib_gid) #define CM_LAP_ALTERNATE_REMOTE_PORT_GID \ CM_FIELD_MLOC(struct cm_lap_msg, 40, 128, union ib_gid) #define CM_LAP_ALTERNATE_FLOW_LABEL CM_FIELD32_LOC(struct cm_lap_msg, 56, 20) #define CM_LAP_ALTERNATE_TRAFFIC_CLASS CM_FIELD8_LOC(struct cm_lap_msg, 59, 8) #define CM_LAP_ALTERNATE_HOP_LIMIT CM_FIELD8_LOC(struct cm_lap_msg, 60, 8) #define CM_LAP_ALTERNATE_PACKET_RATE CM_FIELD_BLOC(struct cm_lap_msg, 61, 2, 6) #define CM_LAP_ALTERNATE_SL CM_FIELD8_LOC(struct cm_lap_msg, 62, 4) #define CM_LAP_ALTERNATE_SUBNET_LOCAL CM_FIELD_BLOC(struct cm_lap_msg, 62, 4, 1) #define CM_LAP_ALTERNATE_LOCAL_ACK_TIMEOUT \ CM_FIELD8_LOC(struct cm_lap_msg, 63, 5) #define CM_LAP_PRIVATE_DATA CM_FIELD_MLOC(struct cm_lap_msg, 64, 1344, void) CM_STRUCT(struct cm_lap_msg, 64 * 8 + 1344); /* Table 116 APR Message Contents */ #define CM_APR_LOCAL_COMM_ID CM_FIELD32_LOC(struct cm_apr_msg, 0, 32) #define CM_APR_REMOTE_COMM_ID CM_FIELD32_LOC(struct cm_apr_msg, 4, 32) #define CM_APR_ADDITIONAL_INFORMATION_LENGTH \ CM_FIELD8_LOC(struct cm_apr_msg, 8, 8) #define CM_APR_AR_STATUS CM_FIELD8_LOC(struct cm_apr_msg, 9, 8) #define CM_APR_ADDITIONAL_INFORMATION \ CM_FIELD_MLOC(struct cm_apr_msg, 12, 576, void) #define CM_APR_PRIVATE_DATA CM_FIELD_MLOC(struct cm_apr_msg, 84, 1184, void) CM_STRUCT(struct cm_apr_msg, 84 * 8 + 1184); /* Table 119 SIDR_REQ Message Contents */ #define CM_SIDR_REQ_REQUESTID CM_FIELD32_LOC(struct cm_sidr_req_msg, 0, 32) #define CM_SIDR_REQ_PARTITION_KEY CM_FIELD16_LOC(struct cm_sidr_req_msg, 4, 16) #define CM_SIDR_REQ_SERVICEID CM_FIELD64_LOC(struct cm_sidr_req_msg, 8) #define CM_SIDR_REQ_PRIVATE_DATA \ CM_FIELD_MLOC(struct cm_sidr_req_msg, 16, 1728, void) CM_STRUCT(struct cm_sidr_req_msg, 16 * 8 + 1728); /* Table 120 SIDR_REP Message Contents */ #define CM_SIDR_REP_REQUESTID CM_FIELD32_LOC(struct cm_sidr_rep_msg, 0, 32) #define CM_SIDR_REP_STATUS CM_FIELD8_LOC(struct cm_sidr_rep_msg, 4, 8) #define CM_SIDR_REP_ADDITIONAL_INFORMATION_LENGTH \ CM_FIELD8_LOC(struct cm_sidr_rep_msg, 5, 8) #define CM_SIDR_REP_VENDOR_ID_H CM_FIELD16_LOC(struct cm_sidr_rep_msg, 6, 16) #define CM_SIDR_REP_QPN CM_FIELD32_LOC(struct cm_sidr_rep_msg, 8, 24) #define CM_SIDR_REP_VENDOR_ID_L CM_FIELD8_LOC(struct cm_sidr_rep_msg, 11, 8) #define CM_SIDR_REP_SERVICEID CM_FIELD64_LOC(struct cm_sidr_rep_msg, 12) #define CM_SIDR_REP_Q_KEY CM_FIELD32_LOC(struct cm_sidr_rep_msg, 20, 32) #define CM_SIDR_REP_ADDITIONAL_INFORMATION \ CM_FIELD_MLOC(struct cm_sidr_rep_msg, 24, 576, void) #define CM_SIDR_REP_PRIVATE_DATA \ CM_FIELD_MLOC(struct cm_sidr_rep_msg, 96, 1088, void) CM_STRUCT(struct cm_sidr_rep_msg, 96 * 8 + 1088); #endif /* _IBTA_VOL1_C12_H_ */ PK ! �0�I� � rdma/ib_smi.hnu �[��� /* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ /* * Copyright (c) 2004 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2004 Infinicon Corporation. All rights reserved. * Copyright (c) 2004 Intel Corporation. All rights reserved. * Copyright (c) 2004 Topspin Corporation. All rights reserved. * Copyright (c) 2004 Voltaire Corporation. All rights reserved. */ #ifndef IB_SMI_H #define IB_SMI_H #include <rdma/ib_mad.h> #define IB_SMP_DATA_SIZE 64 #define IB_SMP_MAX_PATH_HOPS 64 struct ib_smp { u8 base_version; u8 mgmt_class; u8 class_version; u8 method; __be16 status; u8 hop_ptr; u8 hop_cnt; __be64 tid; __be16 attr_id; __be16 resv; __be32 attr_mod; __be64 mkey; __be16 dr_slid; __be16 dr_dlid; u8 reserved[28]; u8 data[IB_SMP_DATA_SIZE]; u8 initial_path[IB_SMP_MAX_PATH_HOPS]; u8 return_path[IB_SMP_MAX_PATH_HOPS]; } __packed; #define IB_SMP_DIRECTION cpu_to_be16(0x8000) /* Subnet management attributes */ #define IB_SMP_ATTR_NOTICE cpu_to_be16(0x0002) #define IB_SMP_ATTR_NODE_DESC cpu_to_be16(0x0010) #define IB_SMP_ATTR_NODE_INFO cpu_to_be16(0x0011) #define IB_SMP_ATTR_SWITCH_INFO cpu_to_be16(0x0012) #define IB_SMP_ATTR_GUID_INFO cpu_to_be16(0x0014) #define IB_SMP_ATTR_PORT_INFO cpu_to_be16(0x0015) #define IB_SMP_ATTR_PKEY_TABLE cpu_to_be16(0x0016) #define IB_SMP_ATTR_SL_TO_VL_TABLE cpu_to_be16(0x0017) #define IB_SMP_ATTR_VL_ARB_TABLE cpu_to_be16(0x0018) #define IB_SMP_ATTR_LINEAR_FORWARD_TABLE cpu_to_be16(0x0019) #define IB_SMP_ATTR_RANDOM_FORWARD_TABLE cpu_to_be16(0x001A) #define IB_SMP_ATTR_MCAST_FORWARD_TABLE cpu_to_be16(0x001B) #define IB_SMP_ATTR_SM_INFO cpu_to_be16(0x0020) #define IB_SMP_ATTR_VENDOR_DIAG cpu_to_be16(0x0030) #define IB_SMP_ATTR_LED_INFO cpu_to_be16(0x0031) #define IB_SMP_ATTR_VENDOR_MASK cpu_to_be16(0xFF00) struct ib_port_info { __be64 mkey; __be64 gid_prefix; __be16 lid; __be16 sm_lid; __be32 cap_mask; __be16 diag_code; __be16 mkey_lease_period; u8 local_port_num; u8 link_width_enabled; u8 link_width_supported; u8 link_width_active; u8 linkspeed_portstate; /* 4 bits, 4 bits */ u8 portphysstate_linkdown; /* 4 bits, 4 bits */ u8 mkeyprot_resv_lmc; /* 2 bits, 3, 3 */ u8 linkspeedactive_enabled; /* 4 bits, 4 bits */ u8 neighbormtu_mastersmsl; /* 4 bits, 4 bits */ u8 vlcap_inittype; /* 4 bits, 4 bits */ u8 vl_high_limit; u8 vl_arb_high_cap; u8 vl_arb_low_cap; u8 inittypereply_mtucap; /* 4 bits, 4 bits */ u8 vlstallcnt_hoqlife; /* 3 bits, 5 bits */ u8 operationalvl_pei_peo_fpi_fpo; /* 4 bits, 1, 1, 1, 1 */ __be16 mkey_violations; __be16 pkey_violations; __be16 qkey_violations; u8 guid_cap; u8 clientrereg_resv_subnetto; /* 1 bit, 2 bits, 5 */ u8 resv_resptimevalue; /* 3 bits, 5 bits */ u8 localphyerrors_overrunerrors; /* 4 bits, 4 bits */ __be16 max_credit_hint; u8 resv; u8 link_roundtrip_latency[3]; }; struct ib_node_info { u8 base_version; u8 class_version; u8 node_type; u8 num_ports; __be64 sys_guid; __be64 node_guid; __be64 port_guid; __be16 partition_cap; __be16 device_id; __be32 revision; u8 local_port_num; u8 vendor_id[3]; } __packed; struct ib_vl_weight_elem { u8 vl; /* IB: VL is low 4 bits, upper 4 bits reserved */ /* OPA: VL is low 5 bits, upper 3 bits reserved */ u8 weight; }; static inline u8 ib_get_smp_direction(struct ib_smp *smp) { return ((smp->status & IB_SMP_DIRECTION) == IB_SMP_DIRECTION); } /* * SM Trap/Notice numbers */ #define IB_NOTICE_TRAP_LLI_THRESH cpu_to_be16(129) #define IB_NOTICE_TRAP_EBO_THRESH cpu_to_be16(130) #define IB_NOTICE_TRAP_FLOW_UPDATE cpu_to_be16(131) #define IB_NOTICE_TRAP_CAP_MASK_CHG cpu_to_be16(144) #define IB_NOTICE_TRAP_SYS_GUID_CHG cpu_to_be16(145) #define IB_NOTICE_TRAP_BAD_MKEY cpu_to_be16(256) #define IB_NOTICE_TRAP_BAD_PKEY cpu_to_be16(257) #define IB_NOTICE_TRAP_BAD_QKEY cpu_to_be16(258) /* * Other local changes flags (trap 144). */ #define IB_NOTICE_TRAP_LSE_CHG 0x04 /* Link Speed Enable changed */ #define IB_NOTICE_TRAP_LWE_CHG 0x02 /* Link Width Enable changed */ #define IB_NOTICE_TRAP_NODE_DESC_CHG 0x01 /* * M_Key volation flags in dr_trunc_hop (trap 256). */ #define IB_NOTICE_TRAP_DR_NOTICE 0x80 #define IB_NOTICE_TRAP_DR_TRUNC 0x40 #endif /* IB_SMI_H */ PK ! p�&�8 �8 rdma/rdma_vt.hnu �[��� /* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ /* * Copyright(c) 2016 - 2019 Intel Corporation. */ #ifndef DEF_RDMA_VT_H #define DEF_RDMA_VT_H /* * Structure that low level drivers will populate in order to register with the * rdmavt layer. */ #include <linux/spinlock.h> #include <linux/list.h> #include <linux/hash.h> #include <rdma/ib_verbs.h> #include <rdma/ib_mad.h> #include <rdma/rdmavt_mr.h> #define RVT_MAX_PKEY_VALUES 16 #define RVT_MAX_TRAP_LEN 100 /* Limit pending trap list */ #define RVT_MAX_TRAP_LISTS 5 /*((IB_NOTICE_TYPE_INFO & 0x0F) + 1)*/ #define RVT_TRAP_TIMEOUT 4096 /* 4.096 usec */ struct trap_list { u32 list_len; struct list_head list; }; struct rvt_qp; struct rvt_qpn_table; struct rvt_ibport { struct rvt_qp __rcu *qp[2]; struct ib_mad_agent *send_agent; /* agent for SMI (traps) */ struct rb_root mcast_tree; spinlock_t lock; /* protect changes in this struct */ /* non-zero when timer is set */ unsigned long mkey_lease_timeout; unsigned long trap_timeout; __be64 gid_prefix; /* in network order */ __be64 mkey; u64 tid; u32 port_cap_flags; u16 port_cap3_flags; u32 pma_sample_start; u32 pma_sample_interval; __be16 pma_counter_select[5]; u16 pma_tag; u16 mkey_lease_period; u32 sm_lid; u8 sm_sl; u8 mkeyprot; u8 subnet_timeout; u8 vl_high_limit; /* * Driver is expected to keep these up to date. These * counters are informational only and not required to be * completely accurate. */ u64 n_rc_resends; u64 n_seq_naks; u64 n_rdma_seq; u64 n_rnr_naks; u64 n_other_naks; u64 n_loop_pkts; u64 n_pkt_drops; u64 n_vl15_dropped; u64 n_rc_timeouts; u64 n_dmawait; u64 n_unaligned; u64 n_rc_dupreq; u64 n_rc_seqnak; u64 n_rc_crwaits; u16 pkey_violations; u16 qkey_violations; u16 mkey_violations; /* Hot-path per CPU counters to avoid cacheline trading to update */ u64 z_rc_acks; u64 z_rc_qacks; u64 z_rc_delayed_comp; u64 __percpu *rc_acks; u64 __percpu *rc_qacks; u64 __percpu *rc_delayed_comp; void *priv; /* driver private data */ /* * The pkey table is allocated and maintained by the driver. Drivers * need to have access to this before registering with rdmav. However * rdmavt will need access to it so drivers need to provide this during * the attach port API call. */ u16 *pkey_table; struct rvt_ah *sm_ah; /* * Keep a list of traps that have not been repressed. They will be * resent based on trap_timer. */ struct trap_list trap_lists[RVT_MAX_TRAP_LISTS]; struct timer_list trap_timer; }; #define RVT_CQN_MAX 16 /* maximum length of cq name */ #define RVT_SGE_COPY_MEMCPY 0 #define RVT_SGE_COPY_CACHELESS 1 #define RVT_SGE_COPY_ADAPTIVE 2 /* * Things that are driver specific, module parameters in hfi1 and qib */ struct rvt_driver_params { struct ib_device_attr props; /* * Anything driver specific that is not covered by props * For instance special module parameters. Goes here. */ unsigned int lkey_table_size; unsigned int qp_table_size; unsigned int sge_copy_mode; unsigned int wss_threshold; unsigned int wss_clean_period; int qpn_start; int qpn_inc; int qpn_res_start; int qpn_res_end; int nports; int npkeys; int node; int psn_mask; int psn_shift; int psn_modify_mask; u32 core_cap_flags; u32 max_mad_size; u8 qos_shift; u8 max_rdma_atomic; u8 extra_rdma_atomic; u8 reserved_operations; }; /* User context */ struct rvt_ucontext { struct ib_ucontext ibucontext; }; /* Protection domain */ struct rvt_pd { struct ib_pd ibpd; bool user; }; /* Address handle */ struct rvt_ah { struct ib_ah ibah; struct rdma_ah_attr attr; u8 vl; u8 log_pmtu; }; /* * This structure is used by rvt_mmap() to validate an offset * when an mmap() request is made. The vm_area_struct then uses * this as its vm_private_data. */ struct rvt_mmap_info { struct list_head pending_mmaps; struct ib_ucontext *context; void *obj; __u64 offset; struct kref ref; u32 size; }; /* memory working set size */ struct rvt_wss { unsigned long *entries; atomic_t total_count; atomic_t clean_counter; atomic_t clean_entry; int threshold; int num_entries; long pages_mask; unsigned int clean_period; }; struct rvt_dev_info; struct rvt_swqe; struct rvt_driver_provided { /* * Which functions are required depends on which verbs rdmavt is * providing and which verbs the driver is overriding. See * check_support() for details. */ /* hot path calldowns in a single cacheline */ /* * Give the driver a notice that there is send work to do. It is up to * the driver to generally push the packets out, this just queues the * work with the driver. There are two variants here. The no_lock * version requires the s_lock not to be held. The other assumes the * s_lock is held. */ bool (*schedule_send)(struct rvt_qp *qp); bool (*schedule_send_no_lock)(struct rvt_qp *qp); /* * Driver specific work request setup and checking. * This function is allowed to perform any setup, checks, or * adjustments required to the SWQE in order to be usable by * underlying protocols. This includes private data structure * allocations. */ int (*setup_wqe)(struct rvt_qp *qp, struct rvt_swqe *wqe, bool *call_send); /* * Sometimes rdmavt needs to kick the driver's send progress. That is * done by this call back. */ void (*do_send)(struct rvt_qp *qp); /* * Returns a pointer to the underlying hardware's PCI device. This is * used to display information as to what hardware is being referenced * in an output message */ struct pci_dev * (*get_pci_dev)(struct rvt_dev_info *rdi); /* * Allocate a private queue pair data structure for driver specific * information which is opaque to rdmavt. Errors are returned via * ERR_PTR(err). The driver is free to return NULL or a valid * pointer. */ void * (*qp_priv_alloc)(struct rvt_dev_info *rdi, struct rvt_qp *qp); /* * Init a structure allocated with qp_priv_alloc(). This should be * called after all qp fields have been initialized in rdmavt. */ int (*qp_priv_init)(struct rvt_dev_info *rdi, struct rvt_qp *qp, struct ib_qp_init_attr *init_attr); /* * Free the driver's private qp structure. */ void (*qp_priv_free)(struct rvt_dev_info *rdi, struct rvt_qp *qp); /* * Inform the driver the particular qp in question has been reset so * that it can clean up anything it needs to. */ void (*notify_qp_reset)(struct rvt_qp *qp); /* * Get a path mtu from the driver based on qp attributes. */ int (*get_pmtu_from_attr)(struct rvt_dev_info *rdi, struct rvt_qp *qp, struct ib_qp_attr *attr); /* * Notify driver that it needs to flush any outstanding IO requests that * are waiting on a qp. */ void (*flush_qp_waiters)(struct rvt_qp *qp); /* * Notify driver to stop its queue of sending packets. Nothing else * should be posted to the queue pair after this has been called. */ void (*stop_send_queue)(struct rvt_qp *qp); /* * Have the driver drain any in progress operations */ void (*quiesce_qp)(struct rvt_qp *qp); /* * Inform the driver a qp has went to error state. */ void (*notify_error_qp)(struct rvt_qp *qp); /* * Get an MTU for a qp. */ u32 (*mtu_from_qp)(struct rvt_dev_info *rdi, struct rvt_qp *qp, u32 pmtu); /* * Convert an mtu to a path mtu */ int (*mtu_to_path_mtu)(u32 mtu); /* * Get the guid of a port in big endian byte order */ int (*get_guid_be)(struct rvt_dev_info *rdi, struct rvt_ibport *rvp, int guid_index, __be64 *guid); /* * Query driver for the state of the port. */ int (*query_port_state)(struct rvt_dev_info *rdi, u32 port_num, struct ib_port_attr *props); /* * Tell driver to shutdown a port */ int (*shut_down_port)(struct rvt_dev_info *rdi, u32 port_num); /* Tell driver to send a trap for changed port capabilities */ void (*cap_mask_chg)(struct rvt_dev_info *rdi, u32 port_num); /* * The following functions can be safely ignored completely. Any use of * these is checked for NULL before blindly calling. Rdmavt should also * be functional if drivers omit these. */ /* Called to inform the driver that all qps should now be freed. */ unsigned (*free_all_qps)(struct rvt_dev_info *rdi); /* Driver specific AH validation */ int (*check_ah)(struct ib_device *, struct rdma_ah_attr *); /* Inform the driver a new AH has been created */ void (*notify_new_ah)(struct ib_device *, struct rdma_ah_attr *, struct rvt_ah *); /* Let the driver pick the next queue pair number*/ int (*alloc_qpn)(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt, enum ib_qp_type type, u32 port_num); /* Determine if its safe or allowed to modify the qp */ int (*check_modify_qp)(struct rvt_qp *qp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata); /* Driver specific QP modification/notification-of */ void (*modify_qp)(struct rvt_qp *qp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata); /* Notify driver a mad agent has been created */ void (*notify_create_mad_agent)(struct rvt_dev_info *rdi, int port_idx); /* Notify driver a mad agent has been removed */ void (*notify_free_mad_agent)(struct rvt_dev_info *rdi, int port_idx); /* Notify driver to restart rc */ void (*notify_restart_rc)(struct rvt_qp *qp, u32 psn, int wait); /* Get and return CPU to pin CQ processing thread */ int (*comp_vect_cpu_lookup)(struct rvt_dev_info *rdi, int comp_vect); }; struct rvt_dev_info { struct ib_device ibdev; /* Keep this first. Nothing above here */ /* * Prior to calling for registration the driver will be responsible for * allocating space for this structure. * * The driver will also be responsible for filling in certain members of * dparms.props. The driver needs to fill in dparms exactly as it would * want values reported to a ULP. This will be returned to the caller * in rdmavt's device. The driver should also therefore refrain from * modifying this directly after registration with rdmavt. */ /* Driver specific properties */ struct rvt_driver_params dparms; /* post send table */ const struct rvt_operation_params *post_parms; /* opcode translation table */ const enum ib_wc_opcode *wc_opcode; /* Driver specific helper functions */ struct rvt_driver_provided driver_f; struct rvt_mregion __rcu *dma_mr; struct rvt_lkey_table lkey_table; /* Internal use */ int n_pds_allocated; spinlock_t n_pds_lock; /* Protect pd allocated count */ int n_ahs_allocated; spinlock_t n_ahs_lock; /* Protect ah allocated count */ u32 n_srqs_allocated; spinlock_t n_srqs_lock; /* Protect srqs allocated count */ int flags; struct rvt_ibport **ports; /* QP */ struct rvt_qp_ibdev *qp_dev; u32 n_qps_allocated; /* number of QPs allocated for device */ u32 n_rc_qps; /* number of RC QPs allocated for device */ u32 busy_jiffies; /* timeout scaling based on RC QP count */ spinlock_t n_qps_lock; /* protect qps, rc qps and busy jiffy counts */ /* memory maps */ struct list_head pending_mmaps; spinlock_t mmap_offset_lock; /* protect mmap_offset */ u32 mmap_offset; spinlock_t pending_lock; /* protect pending mmap list */ /* CQ */ u32 n_cqs_allocated; /* number of CQs allocated for device */ spinlock_t n_cqs_lock; /* protect count of in use cqs */ /* Multicast */ u32 n_mcast_grps_allocated; /* number of mcast groups allocated */ spinlock_t n_mcast_grps_lock; /* Memory Working Set Size */ struct rvt_wss *wss; }; /** * rvt_set_ibdev_name - Craft an IB device name from client info * @rdi: pointer to the client rvt_dev_info structure * @name: client specific name * @unit: client specific unit number. */ static inline void rvt_set_ibdev_name(struct rvt_dev_info *rdi, const char *fmt, const char *name, const int unit) { /* * FIXME: rvt and its users want to touch the ibdev before * registration and have things like the name work. We don't have the * infrastructure in the core to support this directly today, hack it * to work by setting the name manually here. */ dev_set_name(&rdi->ibdev.dev, fmt, name, unit); strlcpy(rdi->ibdev.name, dev_name(&rdi->ibdev.dev), IB_DEVICE_NAME_MAX); } /** * rvt_get_ibdev_name - return the IB name * @rdi: rdmavt device * * Return the registered name of the device. */ static inline const char *rvt_get_ibdev_name(const struct rvt_dev_info *rdi) { return dev_name(&rdi->ibdev.dev); } static inline struct rvt_pd *ibpd_to_rvtpd(struct ib_pd *ibpd) { return container_of(ibpd, struct rvt_pd, ibpd); } static inline struct rvt_ah *ibah_to_rvtah(struct ib_ah *ibah) { return container_of(ibah, struct rvt_ah, ibah); } static inline struct rvt_dev_info *ib_to_rvt(struct ib_device *ibdev) { return container_of(ibdev, struct rvt_dev_info, ibdev); } static inline unsigned rvt_get_npkeys(struct rvt_dev_info *rdi) { /* * All ports have same number of pkeys. */ return rdi->dparms.npkeys; } /* * Return the max atomic suitable for determining * the size of the ack ring buffer in a QP. */ static inline unsigned int rvt_max_atomic(struct rvt_dev_info *rdi) { return rdi->dparms.max_rdma_atomic + rdi->dparms.extra_rdma_atomic + 1; } static inline unsigned int rvt_size_atomic(struct rvt_dev_info *rdi) { return rdi->dparms.max_rdma_atomic + rdi->dparms.extra_rdma_atomic; } /* * Return the indexed PKEY from the port PKEY table. */ static inline u16 rvt_get_pkey(struct rvt_dev_info *rdi, int port_index, unsigned index) { if (index >= rvt_get_npkeys(rdi)) return 0; else return rdi->ports[port_index]->pkey_table[index]; } struct rvt_dev_info *rvt_alloc_device(size_t size, int nports); void rvt_dealloc_device(struct rvt_dev_info *rdi); int rvt_register_device(struct rvt_dev_info *rvd); void rvt_unregister_device(struct rvt_dev_info *rvd); int rvt_check_ah(struct ib_device *ibdev, struct rdma_ah_attr *ah_attr); int rvt_init_port(struct rvt_dev_info *rdi, struct rvt_ibport *port, int port_index, u16 *pkey_table); int rvt_fast_reg_mr(struct rvt_qp *qp, struct ib_mr *ibmr, u32 key, int access); int rvt_invalidate_rkey(struct rvt_qp *qp, u32 rkey); int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge, u32 len, u64 vaddr, u32 rkey, int acc); int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd, struct rvt_sge *isge, struct rvt_sge *last_sge, struct ib_sge *sge, int acc); struct rvt_mcast *rvt_mcast_find(struct rvt_ibport *ibp, union ib_gid *mgid, u16 lid); #endif /* DEF_RDMA_VT_H */ PK ! �"*- - rdma/ib_hdrs.hnu �[��� /* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ /* * Copyright(c) 2016 - 2018 Intel Corporation. */ #ifndef IB_HDRS_H #define IB_HDRS_H #include <linux/types.h> #include <asm/unaligned.h> #include <rdma/ib_verbs.h> #define IB_SEQ_NAK (3 << 29) /* AETH NAK opcode values */ #define IB_RNR_NAK 0x20 #define IB_NAK_PSN_ERROR 0x60 #define IB_NAK_INVALID_REQUEST 0x61 #define IB_NAK_REMOTE_ACCESS_ERROR 0x62 #define IB_NAK_REMOTE_OPERATIONAL_ERROR 0x63 #define IB_NAK_INVALID_RD_REQUEST 0x64 #define IB_BTH_REQ_ACK BIT(31) #define IB_BTH_SOLICITED BIT(23) #define IB_BTH_MIG_REQ BIT(22) #define IB_GRH_VERSION 6 #define IB_GRH_VERSION_MASK 0xF #define IB_GRH_VERSION_SHIFT 28 #define IB_GRH_TCLASS_MASK 0xFF #define IB_GRH_TCLASS_SHIFT 20 #define IB_GRH_FLOW_MASK 0xFFFFF #define IB_GRH_FLOW_SHIFT 0 #define IB_GRH_NEXT_HDR 0x1B #define IB_FECN_SHIFT 31 #define IB_FECN_MASK 1 #define IB_FECN_SMASK BIT(IB_FECN_SHIFT) #define IB_BECN_SHIFT 30 #define IB_BECN_MASK 1 #define IB_BECN_SMASK BIT(IB_BECN_SHIFT) #define IB_AETH_CREDIT_SHIFT 24 #define IB_AETH_CREDIT_MASK 0x1F #define IB_AETH_CREDIT_INVAL 0x1F #define IB_AETH_NAK_SHIFT 29 #define IB_MSN_MASK 0xFFFFFF struct ib_reth { __be64 vaddr; /* potentially unaligned */ __be32 rkey; __be32 length; } __packed; struct ib_atomic_eth { __be64 vaddr; /* potentially unaligned */ __be32 rkey; __be64 swap_data; /* potentially unaligned */ __be64 compare_data; /* potentially unaligned */ } __packed; #include <rdma/tid_rdma_defs.h> union ib_ehdrs { struct { __be32 deth[2]; __be32 imm_data; } ud; struct { struct ib_reth reth; __be32 imm_data; } rc; struct { __be32 aeth; __be64 atomic_ack_eth; /* potentially unaligned */ } __packed at; __be32 imm_data; __be32 aeth; __be32 ieth; struct ib_atomic_eth atomic_eth; /* TID RDMA headers */ union { struct tid_rdma_read_req r_req; struct tid_rdma_read_resp r_rsp; struct tid_rdma_write_req w_req; struct tid_rdma_write_resp w_rsp; struct tid_rdma_write_data w_data; struct tid_rdma_resync resync; struct tid_rdma_ack ack; } tid_rdma; } __packed; struct ib_other_headers { __be32 bth[3]; union ib_ehdrs u; } __packed; struct ib_header { __be16 lrh[4]; union { struct { struct ib_grh grh; struct ib_other_headers oth; } l; struct ib_other_headers oth; } u; } __packed; /* accessors for unaligned __be64 items */ static inline u64 ib_u64_get(__be64 *p) { return get_unaligned_be64(p); } static inline void ib_u64_put(u64 val, __be64 *p) { put_unaligned_be64(val, p); } static inline u64 get_ib_reth_vaddr(struct ib_reth *reth) { return ib_u64_get(&reth->vaddr); } static inline void put_ib_reth_vaddr(u64 val, struct ib_reth *reth) { ib_u64_put(val, &reth->vaddr); } static inline u64 get_ib_ateth_vaddr(struct ib_atomic_eth *ateth) { return ib_u64_get(&ateth->vaddr); } static inline void put_ib_ateth_vaddr(u64 val, struct ib_atomic_eth *ateth) { ib_u64_put(val, &ateth->vaddr); } static inline u64 get_ib_ateth_swap(struct ib_atomic_eth *ateth) { return ib_u64_get(&ateth->swap_data); } static inline void put_ib_ateth_swap(u64 val, struct ib_atomic_eth *ateth) { ib_u64_put(val, &ateth->swap_data); } static inline u64 get_ib_ateth_compare(struct ib_atomic_eth *ateth) { return ib_u64_get(&ateth->compare_data); } static inline void put_ib_ateth_compare(u64 val, struct ib_atomic_eth *ateth) { ib_u64_put(val, &ateth->compare_data); } /* * 9B/IB Packet Format */ #define IB_LNH_MASK 3 #define IB_SC_MASK 0xf #define IB_SC_SHIFT 12 #define IB_SC5_MASK 0x10 #define IB_SL_MASK 0xf #define IB_SL_SHIFT 4 #define IB_SL_SHIFT 4 #define IB_LVER_MASK 0xf #define IB_LVER_SHIFT 8 static inline u8 ib_get_lnh(struct ib_header *hdr) { return (be16_to_cpu(hdr->lrh[0]) & IB_LNH_MASK); } static inline u8 ib_get_sc(struct ib_header *hdr) { return ((be16_to_cpu(hdr->lrh[0]) >> IB_SC_SHIFT) & IB_SC_MASK); } static inline bool ib_is_sc5(u16 sc5) { return !!(sc5 & IB_SC5_MASK); } static inline u8 ib_get_sl(struct ib_header *hdr) { return ((be16_to_cpu(hdr->lrh[0]) >> IB_SL_SHIFT) & IB_SL_MASK); } static inline u16 ib_get_dlid(struct ib_header *hdr) { return (be16_to_cpu(hdr->lrh[1])); } static inline u16 ib_get_slid(struct ib_header *hdr) { return (be16_to_cpu(hdr->lrh[3])); } static inline u8 ib_get_lver(struct ib_header *hdr) { return (u8)((be16_to_cpu(hdr->lrh[0]) >> IB_LVER_SHIFT) & IB_LVER_MASK); } static inline u32 ib_get_qkey(struct ib_other_headers *ohdr) { return be32_to_cpu(ohdr->u.ud.deth[0]); } static inline u32 ib_get_sqpn(struct ib_other_headers *ohdr) { return ((be32_to_cpu(ohdr->u.ud.deth[1])) & IB_QPN_MASK); } /* * BTH */ #define IB_BTH_OPCODE_MASK 0xff #define IB_BTH_OPCODE_SHIFT 24 #define IB_BTH_PAD_MASK 3 #define IB_BTH_PKEY_MASK 0xffff #define IB_BTH_PAD_SHIFT 20 #define IB_BTH_A_MASK 1 #define IB_BTH_A_SHIFT 31 #define IB_BTH_M_MASK 1 #define IB_BTH_M_SHIFT 22 #define IB_BTH_SE_MASK 1 #define IB_BTH_SE_SHIFT 23 #define IB_BTH_TVER_MASK 0xf #define IB_BTH_TVER_SHIFT 16 static inline u8 ib_bth_get_pad(struct ib_other_headers *ohdr) { return ((be32_to_cpu(ohdr->bth[0]) >> IB_BTH_PAD_SHIFT) & IB_BTH_PAD_MASK); } static inline u16 ib_bth_get_pkey(struct ib_other_headers *ohdr) { return (be32_to_cpu(ohdr->bth[0]) & IB_BTH_PKEY_MASK); } static inline u8 ib_bth_get_opcode(struct ib_other_headers *ohdr) { return ((be32_to_cpu(ohdr->bth[0]) >> IB_BTH_OPCODE_SHIFT) & IB_BTH_OPCODE_MASK); } static inline u8 ib_bth_get_ackreq(struct ib_other_headers *ohdr) { return (u8)((be32_to_cpu(ohdr->bth[2]) >> IB_BTH_A_SHIFT) & IB_BTH_A_MASK); } static inline u8 ib_bth_get_migreq(struct ib_other_headers *ohdr) { return (u8)((be32_to_cpu(ohdr->bth[0]) >> IB_BTH_M_SHIFT) & IB_BTH_M_MASK); } static inline u8 ib_bth_get_se(struct ib_other_headers *ohdr) { return (u8)((be32_to_cpu(ohdr->bth[0]) >> IB_BTH_SE_SHIFT) & IB_BTH_SE_MASK); } static inline u32 ib_bth_get_psn(struct ib_other_headers *ohdr) { return (u32)(be32_to_cpu(ohdr->bth[2])); } static inline u32 ib_bth_get_qpn(struct ib_other_headers *ohdr) { return (u32)((be32_to_cpu(ohdr->bth[1])) & IB_QPN_MASK); } static inline bool ib_bth_get_becn(struct ib_other_headers *ohdr) { return (ohdr->bth[1]) & cpu_to_be32(IB_BECN_SMASK); } static inline bool ib_bth_get_fecn(struct ib_other_headers *ohdr) { return (ohdr->bth[1]) & cpu_to_be32(IB_FECN_SMASK); } static inline u8 ib_bth_get_tver(struct ib_other_headers *ohdr) { return (u8)((be32_to_cpu(ohdr->bth[0]) >> IB_BTH_TVER_SHIFT) & IB_BTH_TVER_MASK); } static inline bool ib_bth_is_solicited(struct ib_other_headers *ohdr) { return ohdr->bth[0] & cpu_to_be32(IB_BTH_SOLICITED); } static inline bool ib_bth_is_migration(struct ib_other_headers *ohdr) { return ohdr->bth[0] & cpu_to_be32(IB_BTH_MIG_REQ); } #endif /* IB_HDRS_H */ PK ! 7��p p rdma/rdmavt_qp.hnu �[��� /* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ /* * Copyright(c) 2016 - 2020 Intel Corporation. */ #ifndef DEF_RDMAVT_INCQP_H #define DEF_RDMAVT_INCQP_H #include <rdma/rdma_vt.h> #include <rdma/ib_pack.h> #include <rdma/ib_verbs.h> #include <rdma/rdmavt_cq.h> #include <rdma/rvt-abi.h> /* * Atomic bit definitions for r_aflags. */ #define RVT_R_WRID_VALID 0 #define RVT_R_REWIND_SGE 1 /* * Bit definitions for r_flags. */ #define RVT_R_REUSE_SGE 0x01 #define RVT_R_RDMAR_SEQ 0x02 #define RVT_R_RSP_NAK 0x04 #define RVT_R_RSP_SEND 0x08 #define RVT_R_COMM_EST 0x10 /* * If a packet's QP[23:16] bits match this value, then it is * a PSM packet and the hardware will expect a KDETH header * following the BTH. */ #define RVT_KDETH_QP_PREFIX 0x80 #define RVT_KDETH_QP_SUFFIX 0xffff #define RVT_KDETH_QP_PREFIX_MASK 0x00ff0000 #define RVT_KDETH_QP_PREFIX_SHIFT 16 #define RVT_KDETH_QP_BASE (u32)(RVT_KDETH_QP_PREFIX << \ RVT_KDETH_QP_PREFIX_SHIFT) #define RVT_KDETH_QP_MAX (u32)(RVT_KDETH_QP_BASE + RVT_KDETH_QP_SUFFIX) /* * If a packet's LNH == BTH and DEST QPN[23:16] in the BTH match this * prefix value, then it is an AIP packet with a DETH containing the entropy * value in byte 4 following the BTH. */ #define RVT_AIP_QP_PREFIX 0x81 #define RVT_AIP_QP_SUFFIX 0xffff #define RVT_AIP_QP_PREFIX_MASK 0x00ff0000 #define RVT_AIP_QP_PREFIX_SHIFT 16 #define RVT_AIP_QP_BASE (u32)(RVT_AIP_QP_PREFIX << \ RVT_AIP_QP_PREFIX_SHIFT) #define RVT_AIP_QPN_MAX BIT(RVT_AIP_QP_PREFIX_SHIFT) #define RVT_AIP_QP_MAX (u32)(RVT_AIP_QP_BASE + RVT_AIP_QPN_MAX - 1) /* * Bit definitions for s_flags. * * RVT_S_SIGNAL_REQ_WR - set if QP send WRs contain completion signaled * RVT_S_BUSY - send tasklet is processing the QP * RVT_S_TIMER - the RC retry timer is active * RVT_S_ACK_PENDING - an ACK is waiting to be sent after RDMA read/atomics * RVT_S_WAIT_FENCE - waiting for all prior RDMA read or atomic SWQEs * before processing the next SWQE * RVT_S_WAIT_RDMAR - waiting for a RDMA read or atomic SWQE to complete * before processing the next SWQE * RVT_S_WAIT_RNR - waiting for RNR timeout * RVT_S_WAIT_SSN_CREDIT - waiting for RC credits to process next SWQE * RVT_S_WAIT_DMA - waiting for send DMA queue to drain before generating * next send completion entry not via send DMA * RVT_S_WAIT_PIO - waiting for a send buffer to be available * RVT_S_WAIT_TX - waiting for a struct verbs_txreq to be available * RVT_S_WAIT_DMA_DESC - waiting for DMA descriptors to be available * RVT_S_WAIT_KMEM - waiting for kernel memory to be available * RVT_S_WAIT_PSN - waiting for a packet to exit the send DMA queue * RVT_S_WAIT_ACK - waiting for an ACK packet before sending more requests * RVT_S_SEND_ONE - send one packet, request ACK, then wait for ACK * RVT_S_ECN - a BECN was queued to the send engine * RVT_S_MAX_BIT_MASK - The max bit that can be used by rdmavt */ #define RVT_S_SIGNAL_REQ_WR 0x0001 #define RVT_S_BUSY 0x0002 #define RVT_S_TIMER 0x0004 #define RVT_S_RESP_PENDING 0x0008 #define RVT_S_ACK_PENDING 0x0010 #define RVT_S_WAIT_FENCE 0x0020 #define RVT_S_WAIT_RDMAR 0x0040 #define RVT_S_WAIT_RNR 0x0080 #define RVT_S_WAIT_SSN_CREDIT 0x0100 #define RVT_S_WAIT_DMA 0x0200 #define RVT_S_WAIT_PIO 0x0400 #define RVT_S_WAIT_TX 0x0800 #define RVT_S_WAIT_DMA_DESC 0x1000 #define RVT_S_WAIT_KMEM 0x2000 #define RVT_S_WAIT_PSN 0x4000 #define RVT_S_WAIT_ACK 0x8000 #define RVT_S_SEND_ONE 0x10000 #define RVT_S_UNLIMITED_CREDIT 0x20000 #define RVT_S_ECN 0x40000 #define RVT_S_MAX_BIT_MASK 0x800000 /* * Drivers should use s_flags starting with bit 31 down to the bit next to * RVT_S_MAX_BIT_MASK */ /* * Wait flags that would prevent any packet type from being sent. */ #define RVT_S_ANY_WAIT_IO \ (RVT_S_WAIT_PIO | RVT_S_WAIT_TX | \ RVT_S_WAIT_DMA_DESC | RVT_S_WAIT_KMEM) /* * Wait flags that would prevent send work requests from making progress. */ #define RVT_S_ANY_WAIT_SEND (RVT_S_WAIT_FENCE | RVT_S_WAIT_RDMAR | \ RVT_S_WAIT_RNR | RVT_S_WAIT_SSN_CREDIT | RVT_S_WAIT_DMA | \ RVT_S_WAIT_PSN | RVT_S_WAIT_ACK) #define RVT_S_ANY_WAIT (RVT_S_ANY_WAIT_IO | RVT_S_ANY_WAIT_SEND) /* Number of bits to pay attention to in the opcode for checking qp type */ #define RVT_OPCODE_QP_MASK 0xE0 /* Flags for checking QP state (see ib_rvt_state_ops[]) */ #define RVT_POST_SEND_OK 0x01 #define RVT_POST_RECV_OK 0x02 #define RVT_PROCESS_RECV_OK 0x04 #define RVT_PROCESS_SEND_OK 0x08 #define RVT_PROCESS_NEXT_SEND_OK 0x10 #define RVT_FLUSH_SEND 0x20 #define RVT_FLUSH_RECV 0x40 #define RVT_PROCESS_OR_FLUSH_SEND \ (RVT_PROCESS_SEND_OK | RVT_FLUSH_SEND) #define RVT_SEND_OR_FLUSH_OR_RECV_OK \ (RVT_PROCESS_SEND_OK | RVT_FLUSH_SEND | RVT_PROCESS_RECV_OK) /* * Internal send flags */ #define RVT_SEND_RESERVE_USED IB_SEND_RESERVED_START #define RVT_SEND_COMPLETION_ONLY (IB_SEND_RESERVED_START << 1) /** * rvt_ud_wr - IB UD work plus AH cache * @wr: valid IB work request * @attr: pointer to an allocated AH attribute * * Special case the UD WR so we can keep track of the AH attributes. * * NOTE: This data structure is stricly ordered wr then attr. I.e the attr * MUST come after wr. The ib_ud_wr is sized and copied in rvt_post_one_wr. * The copy assumes that wr is first. */ struct rvt_ud_wr { struct ib_ud_wr wr; struct rdma_ah_attr *attr; }; /* * Send work request queue entry. * The size of the sg_list is determined when the QP is created and stored * in qp->s_max_sge. */ struct rvt_swqe { union { struct ib_send_wr wr; /* don't use wr.sg_list */ struct rvt_ud_wr ud_wr; struct ib_reg_wr reg_wr; struct ib_rdma_wr rdma_wr; struct ib_atomic_wr atomic_wr; }; u32 psn; /* first packet sequence number */ u32 lpsn; /* last packet sequence number */ u32 ssn; /* send sequence number */ u32 length; /* total length of data in sg_list */ void *priv; /* driver dependent field */ struct rvt_sge sg_list[]; }; /** * struct rvt_krwq - kernel struct receive work request * @p_lock: lock to protect producer of the kernel buffer * @head: index of next entry to fill * @c_lock:lock to protect consumer of the kernel buffer * @tail: index of next entry to pull * @count: count is aproximate of total receive enteries posted * @rvt_rwqe: struct of receive work request queue entry * * This structure is used to contain the head pointer, * tail pointer and receive work queue entries for kernel * mode user. */ struct rvt_krwq { spinlock_t p_lock; /* protect producer */ u32 head; /* new work requests posted to the head */ /* protect consumer */ spinlock_t c_lock ____cacheline_aligned_in_smp; u32 tail; /* receives pull requests from here. */ u32 count; /* approx count of receive entries posted */ struct rvt_rwqe *curr_wq; struct rvt_rwqe wq[]; }; /* * rvt_get_swqe_ah - Return the pointer to the struct rvt_ah * @swqe: valid Send WQE * */ static inline struct rvt_ah *rvt_get_swqe_ah(struct rvt_swqe *swqe) { return ibah_to_rvtah(swqe->ud_wr.wr.ah); } /** * rvt_get_swqe_ah_attr - Return the cached ah attribute information * @swqe: valid Send WQE * */ static inline struct rdma_ah_attr *rvt_get_swqe_ah_attr(struct rvt_swqe *swqe) { return swqe->ud_wr.attr; } /** * rvt_get_swqe_remote_qpn - Access the remote QPN value * @swqe: valid Send WQE * */ static inline u32 rvt_get_swqe_remote_qpn(struct rvt_swqe *swqe) { return swqe->ud_wr.wr.remote_qpn; } /** * rvt_get_swqe_remote_qkey - Acces the remote qkey value * @swqe: valid Send WQE * */ static inline u32 rvt_get_swqe_remote_qkey(struct rvt_swqe *swqe) { return swqe->ud_wr.wr.remote_qkey; } /** * rvt_get_swqe_pkey_index - Access the pkey index * @swqe: valid Send WQE * */ static inline u16 rvt_get_swqe_pkey_index(struct rvt_swqe *swqe) { return swqe->ud_wr.wr.pkey_index; } struct rvt_rq { struct rvt_rwq *wq; struct rvt_krwq *kwq; u32 size; /* size of RWQE array */ u8 max_sge; /* protect changes in this struct */ spinlock_t lock ____cacheline_aligned_in_smp; }; /** * rvt_get_rq_count - count numbers of request work queue entries * in circular buffer * @rq: data structure for request queue entry * @head: head indices of the circular buffer * @tail: tail indices of the circular buffer * * Return - total number of entries in the Receive Queue */ static inline u32 rvt_get_rq_count(struct rvt_rq *rq, u32 head, u32 tail) { u32 count = head - tail; if ((s32)count < 0) count += rq->size; return count; } /* * This structure holds the information that the send tasklet needs * to send a RDMA read response or atomic operation. */ struct rvt_ack_entry { struct rvt_sge rdma_sge; u64 atomic_data; u32 psn; u32 lpsn; u8 opcode; u8 sent; void *priv; }; #define RC_QP_SCALING_INTERVAL 5 #define RVT_OPERATION_PRIV 0x00000001 #define RVT_OPERATION_ATOMIC 0x00000002 #define RVT_OPERATION_ATOMIC_SGE 0x00000004 #define RVT_OPERATION_LOCAL 0x00000008 #define RVT_OPERATION_USE_RESERVE 0x00000010 #define RVT_OPERATION_IGN_RNR_CNT 0x00000020 #define RVT_OPERATION_MAX (IB_WR_RESERVED10 + 1) /** * rvt_operation_params - op table entry * @length - the length to copy into the swqe entry * @qpt_support - a bit mask indicating QP type support * @flags - RVT_OPERATION flags (see above) * * This supports table driven post send so that * the driver can have differing an potentially * different sets of operations. * **/ struct rvt_operation_params { size_t length; u32 qpt_support; u32 flags; }; /* * Common variables are protected by both r_rq.lock and s_lock in that order * which only happens in modify_qp() or changing the QP 'state'. */ struct rvt_qp { struct ib_qp ibqp; void *priv; /* Driver private data */ /* read mostly fields above and below */ struct rdma_ah_attr remote_ah_attr; struct rdma_ah_attr alt_ah_attr; struct rvt_qp __rcu *next; /* link list for QPN hash table */ struct rvt_swqe *s_wq; /* send work queue */ struct rvt_mmap_info *ip; unsigned long timeout_jiffies; /* computed from timeout */ int srate_mbps; /* s_srate (below) converted to Mbit/s */ pid_t pid; /* pid for user mode QPs */ u32 remote_qpn; u32 qkey; /* QKEY for this QP (for UD or RD) */ u32 s_size; /* send work queue size */ u16 pmtu; /* decoded from path_mtu */ u8 log_pmtu; /* shift for pmtu */ u8 state; /* QP state */ u8 allowed_ops; /* high order bits of allowed opcodes */ u8 qp_access_flags; u8 alt_timeout; /* Alternate path timeout for this QP */ u8 timeout; /* Timeout for this QP */ u8 s_srate; u8 s_mig_state; u8 port_num; u8 s_pkey_index; /* PKEY index to use */ u8 s_alt_pkey_index; /* Alternate path PKEY index to use */ u8 r_max_rd_atomic; /* max number of RDMA read/atomic to receive */ u8 s_max_rd_atomic; /* max number of RDMA read/atomic to send */ u8 s_retry_cnt; /* number of times to retry */ u8 s_rnr_retry_cnt; u8 r_min_rnr_timer; /* retry timeout value for RNR NAKs */ u8 s_max_sge; /* size of s_wq->sg_list */ u8 s_draining; /* start of read/write fields */ atomic_t refcount ____cacheline_aligned_in_smp; wait_queue_head_t wait; struct rvt_ack_entry *s_ack_queue; struct rvt_sge_state s_rdma_read_sge; spinlock_t r_lock ____cacheline_aligned_in_smp; /* used for APM */ u32 r_psn; /* expected rcv packet sequence number */ unsigned long r_aflags; u64 r_wr_id; /* ID for current receive WQE */ u32 r_ack_psn; /* PSN for next ACK or atomic ACK */ u32 r_len; /* total length of r_sge */ u32 r_rcv_len; /* receive data len processed */ u32 r_msn; /* message sequence number */ u8 r_state; /* opcode of last packet received */ u8 r_flags; u8 r_head_ack_queue; /* index into s_ack_queue[] */ u8 r_adefered; /* defered ack count */ struct list_head rspwait; /* link for waiting to respond */ struct rvt_sge_state r_sge; /* current receive data */ struct rvt_rq r_rq; /* receive work queue */ /* post send line */ spinlock_t s_hlock ____cacheline_aligned_in_smp; u32 s_head; /* new entries added here */ u32 s_next_psn; /* PSN for next request */ u32 s_avail; /* number of entries avail */ u32 s_ssn; /* SSN of tail entry */ atomic_t s_reserved_used; /* reserved entries in use */ spinlock_t s_lock ____cacheline_aligned_in_smp; u32 s_flags; struct rvt_sge_state *s_cur_sge; struct rvt_swqe *s_wqe; struct rvt_sge_state s_sge; /* current send request data */ struct rvt_mregion *s_rdma_mr; u32 s_len; /* total length of s_sge */ u32 s_rdma_read_len; /* total length of s_rdma_read_sge */ u32 s_last_psn; /* last response PSN processed */ u32 s_sending_psn; /* lowest PSN that is being sent */ u32 s_sending_hpsn; /* highest PSN that is being sent */ u32 s_psn; /* current packet sequence number */ u32 s_ack_rdma_psn; /* PSN for sending RDMA read responses */ u32 s_ack_psn; /* PSN for acking sends and RDMA writes */ u32 s_tail; /* next entry to process */ u32 s_cur; /* current work queue entry */ u32 s_acked; /* last un-ACK'ed entry */ u32 s_last; /* last completed entry */ u32 s_lsn; /* limit sequence number (credit) */ u32 s_ahgpsn; /* set to the psn in the copy of the header */ u16 s_cur_size; /* size of send packet in bytes */ u16 s_rdma_ack_cnt; u8 s_hdrwords; /* size of s_hdr in 32 bit words */ s8 s_ahgidx; u8 s_state; /* opcode of last packet sent */ u8 s_ack_state; /* opcode of packet to ACK */ u8 s_nak_state; /* non-zero if NAK is pending */ u8 r_nak_state; /* non-zero if NAK is pending */ u8 s_retry; /* requester retry counter */ u8 s_rnr_retry; /* requester RNR retry counter */ u8 s_num_rd_atomic; /* number of RDMA read/atomic pending */ u8 s_tail_ack_queue; /* index into s_ack_queue[] */ u8 s_acked_ack_queue; /* index into s_ack_queue[] */ struct rvt_sge_state s_ack_rdma_sge; struct timer_list s_timer; struct hrtimer s_rnr_timer; atomic_t local_ops_pending; /* number of fast_reg/local_inv reqs */ /* * This sge list MUST be last. Do not add anything below here. */ struct rvt_sge *r_sg_list /* verified SGEs */ ____cacheline_aligned_in_smp; }; struct rvt_srq { struct ib_srq ibsrq; struct rvt_rq rq; struct rvt_mmap_info *ip; /* send signal when number of RWQEs < limit */ u32 limit; }; static inline struct rvt_srq *ibsrq_to_rvtsrq(struct ib_srq *ibsrq) { return container_of(ibsrq, struct rvt_srq, ibsrq); } static inline struct rvt_qp *ibqp_to_rvtqp(struct ib_qp *ibqp) { return container_of(ibqp, struct rvt_qp, ibqp); } #define RVT_QPN_MAX BIT(24) #define RVT_QPNMAP_ENTRIES (RVT_QPN_MAX / PAGE_SIZE / BITS_PER_BYTE) #define RVT_BITS_PER_PAGE (PAGE_SIZE * BITS_PER_BYTE) #define RVT_BITS_PER_PAGE_MASK (RVT_BITS_PER_PAGE - 1) #define RVT_QPN_MASK IB_QPN_MASK /* * QPN-map pages start out as NULL, they get allocated upon * first use and are never deallocated. This way, * large bitmaps are not allocated unless large numbers of QPs are used. */ struct rvt_qpn_map { void *page; }; struct rvt_qpn_table { spinlock_t lock; /* protect changes to the qp table */ unsigned flags; /* flags for QP0/1 allocated for each port */ u32 last; /* last QP number allocated */ u32 nmaps; /* size of the map table */ u16 limit; u8 incr; /* bit map of free QP numbers other than 0/1 */ struct rvt_qpn_map map[RVT_QPNMAP_ENTRIES]; }; struct rvt_qp_ibdev { u32 qp_table_size; u32 qp_table_bits; struct rvt_qp __rcu **qp_table; spinlock_t qpt_lock; /* qptable lock */ struct rvt_qpn_table qpn_table; }; /* * There is one struct rvt_mcast for each multicast GID. * All attached QPs are then stored as a list of * struct rvt_mcast_qp. */ struct rvt_mcast_qp { struct list_head list; struct rvt_qp *qp; }; struct rvt_mcast_addr { union ib_gid mgid; u16 lid; }; struct rvt_mcast { struct rb_node rb_node; struct rvt_mcast_addr mcast_addr; struct list_head qp_list; wait_queue_head_t wait; atomic_t refcount; int n_attached; }; /* * Since struct rvt_swqe is not a fixed size, we can't simply index into * struct rvt_qp.s_wq. This function does the array index computation. */ static inline struct rvt_swqe *rvt_get_swqe_ptr(struct rvt_qp *qp, unsigned n) { return (struct rvt_swqe *)((char *)qp->s_wq + (sizeof(struct rvt_swqe) + qp->s_max_sge * sizeof(struct rvt_sge)) * n); } /* * Since struct rvt_rwqe is not a fixed size, we can't simply index into * struct rvt_rwq.wq. This function does the array index computation. */ static inline struct rvt_rwqe *rvt_get_rwqe_ptr(struct rvt_rq *rq, unsigned n) { return (struct rvt_rwqe *) ((char *)rq->kwq->curr_wq + (sizeof(struct rvt_rwqe) + rq->max_sge * sizeof(struct ib_sge)) * n); } /** * rvt_is_user_qp - return if this is user mode QP * @qp - the target QP */ static inline bool rvt_is_user_qp(struct rvt_qp *qp) { return !!qp->pid; } /** * rvt_get_qp - get a QP reference * @qp - the QP to hold */ static inline void rvt_get_qp(struct rvt_qp *qp) { atomic_inc(&qp->refcount); } /** * rvt_put_qp - release a QP reference * @qp - the QP to release */ static inline void rvt_put_qp(struct rvt_qp *qp) { if (qp && atomic_dec_and_test(&qp->refcount)) wake_up(&qp->wait); } /** * rvt_put_swqe - drop mr refs held by swqe * @wqe - the send wqe * * This drops any mr references held by the swqe */ static inline void rvt_put_swqe(struct rvt_swqe *wqe) { int i; for (i = 0; i < wqe->wr.num_sge; i++) { struct rvt_sge *sge = &wqe->sg_list[i]; rvt_put_mr(sge->mr); } } /** * rvt_qp_wqe_reserve - reserve operation * @qp - the rvt qp * @wqe - the send wqe * * This routine used in post send to record * a wqe relative reserved operation use. */ static inline void rvt_qp_wqe_reserve( struct rvt_qp *qp, struct rvt_swqe *wqe) { atomic_inc(&qp->s_reserved_used); } /** * rvt_qp_wqe_unreserve - clean reserved operation * @qp - the rvt qp * @flags - send wqe flags * * This decrements the reserve use count. * * This call MUST precede the change to * s_last to insure that post send sees a stable * s_avail. * * An smp_mp__after_atomic() is used to insure * the compiler does not juggle the order of the s_last * ring index and the decrementing of s_reserved_used. */ static inline void rvt_qp_wqe_unreserve(struct rvt_qp *qp, int flags) { if (unlikely(flags & RVT_SEND_RESERVE_USED)) { atomic_dec(&qp->s_reserved_used); /* insure no compiler re-order up to s_last change */ smp_mb__after_atomic(); } } extern const enum ib_wc_opcode ib_rvt_wc_opcode[]; /* * Compare the lower 24 bits of the msn values. * Returns an integer <, ==, or > than zero. */ static inline int rvt_cmp_msn(u32 a, u32 b) { return (((int)a) - ((int)b)) << 8; } __be32 rvt_compute_aeth(struct rvt_qp *qp); void rvt_get_credit(struct rvt_qp *qp, u32 aeth); u32 rvt_restart_sge(struct rvt_sge_state *ss, struct rvt_swqe *wqe, u32 len); /** * rvt_div_round_up_mtu - round up divide * @qp - the qp pair * @len - the length * * Perform a shift based mtu round up divide */ static inline u32 rvt_div_round_up_mtu(struct rvt_qp *qp, u32 len) { return (len + qp->pmtu - 1) >> qp->log_pmtu; } /** * @qp - the qp pair * @len - the length * * Perform a shift based mtu divide */ static inline u32 rvt_div_mtu(struct rvt_qp *qp, u32 len) { return len >> qp->log_pmtu; } /** * rvt_timeout_to_jiffies - Convert a ULP timeout input into jiffies * @timeout - timeout input(0 - 31). * * Return a timeout value in jiffies. */ static inline unsigned long rvt_timeout_to_jiffies(u8 timeout) { if (timeout > 31) timeout = 31; return usecs_to_jiffies(1U << timeout) * 4096UL / 1000UL; } /** * rvt_lookup_qpn - return the QP with the given QPN * @ibp: the ibport * @qpn: the QP number to look up * * The caller must hold the rcu_read_lock(), and keep the lock until * the returned qp is no longer in use. */ static inline struct rvt_qp *rvt_lookup_qpn(struct rvt_dev_info *rdi, struct rvt_ibport *rvp, u32 qpn) __must_hold(RCU) { struct rvt_qp *qp = NULL; if (unlikely(qpn <= 1)) { qp = rcu_dereference(rvp->qp[qpn]); } else { u32 n = hash_32(qpn, rdi->qp_dev->qp_table_bits); for (qp = rcu_dereference(rdi->qp_dev->qp_table[n]); qp; qp = rcu_dereference(qp->next)) if (qp->ibqp.qp_num == qpn) break; } return qp; } /** * rvt_mod_retry_timer - mod a retry timer * @qp - the QP * @shift - timeout shift to wait for multiple packets * Modify a potentially already running retry timer */ static inline void rvt_mod_retry_timer_ext(struct rvt_qp *qp, u8 shift) { struct ib_qp *ibqp = &qp->ibqp; struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device); lockdep_assert_held(&qp->s_lock); qp->s_flags |= RVT_S_TIMER; /* 4.096 usec. * (1 << qp->timeout) */ mod_timer(&qp->s_timer, jiffies + rdi->busy_jiffies + (qp->timeout_jiffies << shift)); } static inline void rvt_mod_retry_timer(struct rvt_qp *qp) { return rvt_mod_retry_timer_ext(qp, 0); } /** * rvt_put_qp_swqe - drop refs held by swqe * @qp: the send qp * @wqe: the send wqe * * This drops any references held by the swqe */ static inline void rvt_put_qp_swqe(struct rvt_qp *qp, struct rvt_swqe *wqe) { rvt_put_swqe(wqe); if (qp->allowed_ops == IB_OPCODE_UD) rdma_destroy_ah_attr(wqe->ud_wr.attr); } /** * rvt_qp_sqwe_incr - increment ring index * @qp: the qp * @val: the starting value * * Return: the new value wrapping as appropriate */ static inline u32 rvt_qp_swqe_incr(struct rvt_qp *qp, u32 val) { if (++val >= qp->s_size) val = 0; return val; } int rvt_error_qp(struct rvt_qp *qp, enum ib_wc_status err); /** * rvt_recv_cq - add a new entry to completion queue * by receive queue * @qp: receive queue * @wc: work completion entry to add * @solicited: true if @entry is solicited * * This is wrapper function for rvt_enter_cq function call by * receive queue. If rvt_cq_enter return false, it means cq is * full and the qp is put into error state. */ static inline void rvt_recv_cq(struct rvt_qp *qp, struct ib_wc *wc, bool solicited) { struct rvt_cq *cq = ibcq_to_rvtcq(qp->ibqp.recv_cq); if (unlikely(!rvt_cq_enter(cq, wc, solicited))) rvt_error_qp(qp, IB_WC_LOC_QP_OP_ERR); } /** * rvt_send_cq - add a new entry to completion queue * by send queue * @qp: send queue * @wc: work completion entry to add * @solicited: true if @entry is solicited * * This is wrapper function for rvt_enter_cq function call by * send queue. If rvt_cq_enter return false, it means cq is * full and the qp is put into error state. */ static inline void rvt_send_cq(struct rvt_qp *qp, struct ib_wc *wc, bool solicited) { struct rvt_cq *cq = ibcq_to_rvtcq(qp->ibqp.send_cq); if (unlikely(!rvt_cq_enter(cq, wc, solicited))) rvt_error_qp(qp, IB_WC_LOC_QP_OP_ERR); } /** * rvt_qp_complete_swqe - insert send completion * @qp - the qp * @wqe - the send wqe * @opcode - wc operation (driver dependent) * @status - completion status * * Update the s_last information, and then insert a send * completion into the completion * queue if the qp indicates it should be done. * * See IBTA 10.7.3.1 for info on completion * control. * * Return: new last */ static inline u32 rvt_qp_complete_swqe(struct rvt_qp *qp, struct rvt_swqe *wqe, enum ib_wc_opcode opcode, enum ib_wc_status status) { bool need_completion; u64 wr_id; u32 byte_len, last; int flags = wqe->wr.send_flags; rvt_qp_wqe_unreserve(qp, flags); rvt_put_qp_swqe(qp, wqe); need_completion = !(flags & RVT_SEND_RESERVE_USED) && (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) || (flags & IB_SEND_SIGNALED) || status != IB_WC_SUCCESS); if (need_completion) { wr_id = wqe->wr.wr_id; byte_len = wqe->length; /* above fields required before writing s_last */ } last = rvt_qp_swqe_incr(qp, qp->s_last); /* see rvt_qp_is_avail() */ smp_store_release(&qp->s_last, last); if (need_completion) { struct ib_wc w = { .wr_id = wr_id, .status = status, .opcode = opcode, .qp = &qp->ibqp, .byte_len = byte_len, }; rvt_send_cq(qp, &w, status != IB_WC_SUCCESS); } return last; } extern const int ib_rvt_state_ops[]; struct rvt_dev_info; int rvt_get_rwqe(struct rvt_qp *qp, bool wr_id_only); void rvt_comm_est(struct rvt_qp *qp); void rvt_rc_error(struct rvt_qp *qp, enum ib_wc_status err); unsigned long rvt_rnr_tbl_to_usec(u32 index); enum hrtimer_restart rvt_rc_rnr_retry(struct hrtimer *t); void rvt_add_rnr_timer(struct rvt_qp *qp, u32 aeth); void rvt_del_timers_sync(struct rvt_qp *qp); void rvt_stop_rc_timers(struct rvt_qp *qp); void rvt_add_retry_timer_ext(struct rvt_qp *qp, u8 shift); static inline void rvt_add_retry_timer(struct rvt_qp *qp) { rvt_add_retry_timer_ext(qp, 0); } void rvt_copy_sge(struct rvt_qp *qp, struct rvt_sge_state *ss, void *data, u32 length, bool release, bool copy_last); void rvt_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe, enum ib_wc_status status); void rvt_ruc_loopback(struct rvt_qp *qp); /** * struct rvt_qp_iter - the iterator for QPs * @qp - the current QP * * This structure defines the current iterator * state for sequenced access to all QPs relative * to an rvt_dev_info. */ struct rvt_qp_iter { struct rvt_qp *qp; /* private: backpointer */ struct rvt_dev_info *rdi; /* private: callback routine */ void (*cb)(struct rvt_qp *qp, u64 v); /* private: for arg to callback routine */ u64 v; /* private: number of SMI,GSI QPs for device */ int specials; /* private: current iterator index */ int n; }; /** * ib_cq_tail - Return tail index of cq buffer * @send_cq - The cq for send * * This is called in qp_iter_print to get tail * of cq buffer. */ static inline u32 ib_cq_tail(struct ib_cq *send_cq) { struct rvt_cq *cq = ibcq_to_rvtcq(send_cq); return ibcq_to_rvtcq(send_cq)->ip ? RDMA_READ_UAPI_ATOMIC(cq->queue->tail) : ibcq_to_rvtcq(send_cq)->kqueue->tail; } /** * ib_cq_head - Return head index of cq buffer * @send_cq - The cq for send * * This is called in qp_iter_print to get head * of cq buffer. */ static inline u32 ib_cq_head(struct ib_cq *send_cq) { struct rvt_cq *cq = ibcq_to_rvtcq(send_cq); return ibcq_to_rvtcq(send_cq)->ip ? RDMA_READ_UAPI_ATOMIC(cq->queue->head) : ibcq_to_rvtcq(send_cq)->kqueue->head; } /** * rvt_free_rq - free memory allocated for rvt_rq struct * @rvt_rq: request queue data structure * * This function should only be called if the rvt_mmap_info() * has not succeeded. */ static inline void rvt_free_rq(struct rvt_rq *rq) { kvfree(rq->kwq); rq->kwq = NULL; vfree(rq->wq); rq->wq = NULL; } /** * rvt_to_iport - Get the ibport pointer * @qp: the qp pointer * * This function returns the ibport pointer from the qp pointer. */ static inline struct rvt_ibport *rvt_to_iport(struct rvt_qp *qp) { struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device); return rdi->ports[qp->port_num - 1]; } /** * rvt_rc_credit_avail - Check if there are enough RC credits for the request * @qp: the qp * @wqe: the request * * This function returns false when there are not enough credits for the given * request and true otherwise. */ static inline bool rvt_rc_credit_avail(struct rvt_qp *qp, struct rvt_swqe *wqe) { lockdep_assert_held(&qp->s_lock); if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT) && rvt_cmp_msn(wqe->ssn, qp->s_lsn + 1) > 0) { struct rvt_ibport *rvp = rvt_to_iport(qp); qp->s_flags |= RVT_S_WAIT_SSN_CREDIT; rvp->n_rc_crwaits++; return false; } return true; } struct rvt_qp_iter *rvt_qp_iter_init(struct rvt_dev_info *rdi, u64 v, void (*cb)(struct rvt_qp *qp, u64 v)); int rvt_qp_iter_next(struct rvt_qp_iter *iter); void rvt_qp_iter(struct rvt_dev_info *rdi, u64 v, void (*cb)(struct rvt_qp *qp, u64 v)); void rvt_qp_mr_clean(struct rvt_qp *qp, u32 lkey); #endif /* DEF_RDMAVT_INCQP_H */ PK ! x�`� � rdma/iw_portmap.hnu �[��� /* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ /* * Copyright (c) 2014 Intel Corporation. All rights reserved. * Copyright (c) 2014 Chelsio, Inc. All rights reserved. */ #ifndef _IW_PORTMAP_H #define _IW_PORTMAP_H #include <linux/socket.h> #include <linux/netlink.h> #define IWPM_ULIBNAME_SIZE 32 #define IWPM_DEVNAME_SIZE 32 #define IWPM_IFNAME_SIZE 16 #define IWPM_IPADDR_SIZE 16 enum { IWPM_INVALID_NLMSG_ERR = 10, IWPM_CREATE_MAPPING_ERR, IWPM_DUPLICATE_MAPPING_ERR, IWPM_UNKNOWN_MAPPING_ERR, IWPM_CLIENT_DEV_INFO_ERR, IWPM_USER_LIB_INFO_ERR, IWPM_REMOTE_QUERY_REJECT }; struct iwpm_dev_data { char dev_name[IWPM_DEVNAME_SIZE]; char if_name[IWPM_IFNAME_SIZE]; }; struct iwpm_sa_data { struct sockaddr_storage loc_addr; struct sockaddr_storage mapped_loc_addr; struct sockaddr_storage rem_addr; struct sockaddr_storage mapped_rem_addr; u32 flags; }; int iwpm_init(u8); int iwpm_exit(u8); int iwpm_valid_pid(void); int iwpm_register_pid(struct iwpm_dev_data *pm_msg, u8 nl_client); int iwpm_add_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client); int iwpm_add_and_query_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client); int iwpm_remove_mapping(struct sockaddr_storage *local_addr, u8 nl_client); int iwpm_register_pid_cb(struct sk_buff *, struct netlink_callback *); int iwpm_add_mapping_cb(struct sk_buff *, struct netlink_callback *); int iwpm_add_and_query_mapping_cb(struct sk_buff *, struct netlink_callback *); int iwpm_remote_info_cb(struct sk_buff *, struct netlink_callback *); int iwpm_mapping_error_cb(struct sk_buff *, struct netlink_callback *); int iwpm_mapping_info_cb(struct sk_buff *, struct netlink_callback *); int iwpm_ack_mapping_info_cb(struct sk_buff *, struct netlink_callback *); int iwpm_get_remote_info(struct sockaddr_storage *mapped_loc_addr, struct sockaddr_storage *mapped_rem_addr, struct sockaddr_storage *remote_addr, u8 nl_client); int iwpm_create_mapinfo(struct sockaddr_storage *local_addr, struct sockaddr_storage *mapped_addr, u8 nl_client, u32 map_flags); int iwpm_remove_mapinfo(struct sockaddr_storage *local_addr, struct sockaddr_storage *mapped_addr); int iwpm_hello_cb(struct sk_buff *skb, struct netlink_callback *cb); #endif /* _IW_PORTMAP_H */ PK ! O$�~ rdma/lag.hnu �[��� /* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ /* * Copyright (c) 2020 Mellanox Technologies. All rights reserved. */ #ifndef _RDMA_LAG_H_ #define _RDMA_LAG_H_ #include <net/lag.h> struct ib_device; struct rdma_ah_attr; enum rdma_lag_flags { RDMA_LAG_FLAGS_HASH_ALL_SLAVES = 1 << 0 }; void rdma_lag_put_ah_roce_slave(struct net_device *xmit_slave); struct net_device *rdma_lag_get_ah_roce_slave(struct ib_device *device, struct rdma_ah_attr *ah_attr, gfp_t flags); #endif /* _RDMA_LAG_H_ */ PK ! �L�cw5 w5 kunit/assert.hnu �[��� /* SPDX-License-Identifier: GPL-2.0 */ /* * Assertion and expectation serialization API. * * Copyright (C) 2019, Google LLC. * Author: Brendan Higgins <brendanhiggins@google.com> */ #ifndef _KUNIT_ASSERT_H #define _KUNIT_ASSERT_H #include <linux/err.h> #include <linux/kernel.h> struct kunit; struct string_stream; /** * enum kunit_assert_type - Type of expectation/assertion. * @KUNIT_ASSERTION: Used to denote that a kunit_assert represents an assertion. * @KUNIT_EXPECTATION: Denotes that a kunit_assert represents an expectation. * * Used in conjunction with a &struct kunit_assert to denote whether it * represents an expectation or an assertion. */ enum kunit_assert_type { KUNIT_ASSERTION, KUNIT_EXPECTATION, }; /** * struct kunit_assert - Data for printing a failed assertion or expectation. * @test: the test case this expectation/assertion is associated with. * @type: the type (either an expectation or an assertion) of this kunit_assert. * @line: the source code line number that the expectation/assertion is at. * @file: the file path of the source file that the expectation/assertion is in. * @message: an optional message to provide additional context. * @format: a function which formats the data in this kunit_assert to a string. * * Represents a failed expectation/assertion. Contains all the data necessary to * format a string to a user reporting the failure. */ struct kunit_assert { struct kunit *test; enum kunit_assert_type type; int line; const char *file; struct va_format message; void (*format)(const struct kunit_assert *assert, struct string_stream *stream); }; /** * KUNIT_INIT_VA_FMT_NULL - Default initializer for struct va_format. * * Used inside a struct initialization block to initialize struct va_format to * default values where fmt and va are null. */ #define KUNIT_INIT_VA_FMT_NULL { .fmt = NULL, .va = NULL } /** * KUNIT_INIT_ASSERT_STRUCT() - Initializer for a &struct kunit_assert. * @kunit: The test case that this expectation/assertion is associated with. * @assert_type: The type (assertion or expectation) of this kunit_assert. * @fmt: The formatting function which builds a string out of this kunit_assert. * * The base initializer for a &struct kunit_assert. */ #define KUNIT_INIT_ASSERT_STRUCT(kunit, assert_type, fmt) { \ .test = kunit, \ .type = assert_type, \ .file = __FILE__, \ .line = __LINE__, \ .message = KUNIT_INIT_VA_FMT_NULL, \ .format = fmt \ } void kunit_base_assert_format(const struct kunit_assert *assert, struct string_stream *stream); void kunit_assert_print_msg(const struct kunit_assert *assert, struct string_stream *stream); /** * struct kunit_fail_assert - Represents a plain fail expectation/assertion. * @assert: The parent of this type. * * Represents a simple KUNIT_FAIL/KUNIT_ASSERT_FAILURE that always fails. */ struct kunit_fail_assert { struct kunit_assert assert; }; void kunit_fail_assert_format(const struct kunit_assert *assert, struct string_stream *stream); /** * KUNIT_INIT_FAIL_ASSERT_STRUCT() - Initializer for &struct kunit_fail_assert. * @test: The test case that this expectation/assertion is associated with. * @type: The type (assertion or expectation) of this kunit_assert. * * Initializes a &struct kunit_fail_assert. Intended to be used in * KUNIT_EXPECT_* and KUNIT_ASSERT_* macros. */ #define KUNIT_INIT_FAIL_ASSERT_STRUCT(test, type) { \ .assert = KUNIT_INIT_ASSERT_STRUCT(test, \ type, \ kunit_fail_assert_format) \ } /** * struct kunit_unary_assert - Represents a KUNIT_{EXPECT|ASSERT}_{TRUE|FALSE} * @assert: The parent of this type. * @condition: A string representation of a conditional expression. * @expected_true: True if of type KUNIT_{EXPECT|ASSERT}_TRUE, false otherwise. * * Represents a simple expectation or assertion that simply asserts something is * true or false. In other words, represents the expectations: * KUNIT_{EXPECT|ASSERT}_{TRUE|FALSE} */ struct kunit_unary_assert { struct kunit_assert assert; const char *condition; bool expected_true; }; void kunit_unary_assert_format(const struct kunit_assert *assert, struct string_stream *stream); /** * KUNIT_INIT_UNARY_ASSERT_STRUCT() - Initializes &struct kunit_unary_assert. * @test: The test case that this expectation/assertion is associated with. * @type: The type (assertion or expectation) of this kunit_assert. * @cond: A string representation of the expression asserted true or false. * @expect_true: True if of type KUNIT_{EXPECT|ASSERT}_TRUE, false otherwise. * * Initializes a &struct kunit_unary_assert. Intended to be used in * KUNIT_EXPECT_* and KUNIT_ASSERT_* macros. */ #define KUNIT_INIT_UNARY_ASSERT_STRUCT(test, type, cond, expect_true) { \ .assert = KUNIT_INIT_ASSERT_STRUCT(test, \ type, \ kunit_unary_assert_format), \ .condition = cond, \ .expected_true = expect_true \ } /** * struct kunit_ptr_not_err_assert - An expectation/assertion that a pointer is * not NULL and not a -errno. * @assert: The parent of this type. * @text: A string representation of the expression passed to the expectation. * @value: The actual evaluated pointer value of the expression. * * Represents an expectation/assertion that a pointer is not null and is does * not contain a -errno. (See IS_ERR_OR_NULL().) */ struct kunit_ptr_not_err_assert { struct kunit_assert assert; const char *text; const void *value; }; void kunit_ptr_not_err_assert_format(const struct kunit_assert *assert, struct string_stream *stream); /** * KUNIT_INIT_PTR_NOT_ERR_ASSERT_STRUCT() - Initializes a * &struct kunit_ptr_not_err_assert. * @test: The test case that this expectation/assertion is associated with. * @type: The type (assertion or expectation) of this kunit_assert. * @txt: A string representation of the expression passed to the expectation. * @val: The actual evaluated pointer value of the expression. * * Initializes a &struct kunit_ptr_not_err_assert. Intended to be used in * KUNIT_EXPECT_* and KUNIT_ASSERT_* macros. */ #define KUNIT_INIT_PTR_NOT_ERR_STRUCT(test, type, txt, val) { \ .assert = KUNIT_INIT_ASSERT_STRUCT(test, \ type, \ kunit_ptr_not_err_assert_format), \ .text = txt, \ .value = val \ } /** * struct kunit_binary_assert - An expectation/assertion that compares two * non-pointer values (for example, KUNIT_EXPECT_EQ(test, 1 + 1, 2)). * @assert: The parent of this type. * @operation: A string representation of the comparison operator (e.g. "=="). * @left_text: A string representation of the expression in the left slot. * @left_value: The actual evaluated value of the expression in the left slot. * @right_text: A string representation of the expression in the right slot. * @right_value: The actual evaluated value of the expression in the right slot. * * Represents an expectation/assertion that compares two non-pointer values. For * example, to expect that 1 + 1 == 2, you can use the expectation * KUNIT_EXPECT_EQ(test, 1 + 1, 2); */ struct kunit_binary_assert { struct kunit_assert assert; const char *operation; const char *left_text; long long left_value; const char *right_text; long long right_value; }; void kunit_binary_assert_format(const struct kunit_assert *assert, struct string_stream *stream); /** * KUNIT_INIT_BINARY_ASSERT_STRUCT() - Initializes a * &struct kunit_binary_assert. * @test: The test case that this expectation/assertion is associated with. * @type: The type (assertion or expectation) of this kunit_assert. * @op_str: A string representation of the comparison operator (e.g. "=="). * @left_str: A string representation of the expression in the left slot. * @left_val: The actual evaluated value of the expression in the left slot. * @right_str: A string representation of the expression in the right slot. * @right_val: The actual evaluated value of the expression in the right slot. * * Initializes a &struct kunit_binary_assert. Intended to be used in * KUNIT_EXPECT_* and KUNIT_ASSERT_* macros. */ #define KUNIT_INIT_BINARY_ASSERT_STRUCT(test, \ type, \ op_str, \ left_str, \ left_val, \ right_str, \ right_val) { \ .assert = KUNIT_INIT_ASSERT_STRUCT(test, \ type, \ kunit_binary_assert_format), \ .operation = op_str, \ .left_text = left_str, \ .left_value = left_val, \ .right_text = right_str, \ .right_value = right_val \ } /** * struct kunit_binary_ptr_assert - An expectation/assertion that compares two * pointer values (for example, KUNIT_EXPECT_PTR_EQ(test, foo, bar)). * @assert: The parent of this type. * @operation: A string representation of the comparison operator (e.g. "=="). * @left_text: A string representation of the expression in the left slot. * @left_value: The actual evaluated value of the expression in the left slot. * @right_text: A string representation of the expression in the right slot. * @right_value: The actual evaluated value of the expression in the right slot. * * Represents an expectation/assertion that compares two pointer values. For * example, to expect that foo and bar point to the same thing, you can use the * expectation KUNIT_EXPECT_PTR_EQ(test, foo, bar); */ struct kunit_binary_ptr_assert { struct kunit_assert assert; const char *operation; const char *left_text; const void *left_value; const char *right_text; const void *right_value; }; void kunit_binary_ptr_assert_format(const struct kunit_assert *assert, struct string_stream *stream); /** * KUNIT_INIT_BINARY_PTR_ASSERT_STRUCT() - Initializes a * &struct kunit_binary_ptr_assert. * @test: The test case that this expectation/assertion is associated with. * @type: The type (assertion or expectation) of this kunit_assert. * @op_str: A string representation of the comparison operator (e.g. "=="). * @left_str: A string representation of the expression in the left slot. * @left_val: The actual evaluated value of the expression in the left slot. * @right_str: A string representation of the expression in the right slot. * @right_val: The actual evaluated value of the expression in the right slot. * * Initializes a &struct kunit_binary_ptr_assert. Intended to be used in * KUNIT_EXPECT_* and KUNIT_ASSERT_* macros. */ #define KUNIT_INIT_BINARY_PTR_ASSERT_STRUCT(test, \ type, \ op_str, \ left_str, \ left_val, \ right_str, \ right_val) { \ .assert = KUNIT_INIT_ASSERT_STRUCT(test, \ type, \ kunit_binary_ptr_assert_format), \ .operation = op_str, \ .left_text = left_str, \ .left_value = left_val, \ .right_text = right_str, \ .right_value = right_val \ } /** * struct kunit_binary_str_assert - An expectation/assertion that compares two * string values (for example, KUNIT_EXPECT_STREQ(test, foo, "bar")). * @assert: The parent of this type. * @operation: A string representation of the comparison operator (e.g. "=="). * @left_text: A string representation of the expression in the left slot. * @left_value: The actual evaluated value of the expression in the left slot. * @right_text: A string representation of the expression in the right slot. * @right_value: The actual evaluated value of the expression in the right slot. * * Represents an expectation/assertion that compares two string values. For * example, to expect that the string in foo is equal to "bar", you can use the * expectation KUNIT_EXPECT_STREQ(test, foo, "bar"); */ struct kunit_binary_str_assert { struct kunit_assert assert; const char *operation; const char *left_text; const char *left_value; const char *right_text; const char *right_value; }; void kunit_binary_str_assert_format(const struct kunit_assert *assert, struct string_stream *stream); /** * KUNIT_INIT_BINARY_STR_ASSERT_STRUCT() - Initializes a * &struct kunit_binary_str_assert. * @test: The test case that this expectation/assertion is associated with. * @type: The type (assertion or expectation) of this kunit_assert. * @op_str: A string representation of the comparison operator (e.g. "=="). * @left_str: A string representation of the expression in the left slot. * @left_val: The actual evaluated value of the expression in the left slot. * @right_str: A string representation of the expression in the right slot. * @right_val: The actual evaluated value of the expression in the right slot. * * Initializes a &struct kunit_binary_str_assert. Intended to be used in * KUNIT_EXPECT_* and KUNIT_ASSERT_* macros. */ #define KUNIT_INIT_BINARY_STR_ASSERT_STRUCT(test, \ type, \ op_str, \ left_str, \ left_val, \ right_str, \ right_val) { \ .assert = KUNIT_INIT_ASSERT_STRUCT(test, \ type, \ kunit_binary_str_assert_format), \ .operation = op_str, \ .left_text = left_str, \ .left_value = left_val, \ .right_text = right_str, \ .right_value = right_val \ } #endif /* _KUNIT_ASSERT_H */ PK ! t��t t kunit/try-catch.hnu �[��� /* SPDX-License-Identifier: GPL-2.0 */ /* * An API to allow a function, that may fail, to be executed, and recover in a * controlled manner. * * Copyright (C) 2019, Google LLC. * Author: Brendan Higgins <brendanhiggins@google.com> */ #ifndef _KUNIT_TRY_CATCH_H #define _KUNIT_TRY_CATCH_H #include <linux/types.h> typedef void (*kunit_try_catch_func_t)(void *); struct completion; struct kunit; /** * struct kunit_try_catch - provides a generic way to run code which might fail. * @test: The test case that is currently being executed. * @try_completion: Completion that the control thread waits on while test runs. * @try_result: Contains any errno obtained while running test case. * @try: The function, the test case, to attempt to run. * @catch: The function called if @try bails out. * @context: used to pass user data to the try and catch functions. * * kunit_try_catch provides a generic, architecture independent way to execute * an arbitrary function of type kunit_try_catch_func_t which may bail out by * calling kunit_try_catch_throw(). If kunit_try_catch_throw() is called, @try * is stopped at the site of invocation and @catch is called. * * struct kunit_try_catch provides a generic interface for the functionality * needed to implement kunit->abort() which in turn is needed for implementing * assertions. Assertions allow stating a precondition for a test simplifying * how test cases are written and presented. * * Assertions are like expectations, except they abort (call * kunit_try_catch_throw()) when the specified condition is not met. This is * useful when you look at a test case as a logical statement about some piece * of code, where assertions are the premises for the test case, and the * conclusion is a set of predicates, rather expectations, that must all be * true. If your premises are violated, it does not makes sense to continue. */ struct kunit_try_catch { /* private: internal use only. */ struct kunit *test; struct completion *try_completion; int try_result; kunit_try_catch_func_t try; kunit_try_catch_func_t catch; void *context; }; void kunit_try_catch_run(struct kunit_try_catch *try_catch, void *context); void __noreturn kunit_try_catch_throw(struct kunit_try_catch *try_catch); static inline int kunit_try_catch_get_result(struct kunit_try_catch *try_catch) { return try_catch->try_result; } #endif /* _KUNIT_TRY_CATCH_H */ PK ! /�vG� � kunit/test-bug.hnu �[��� /* SPDX-License-Identifier: GPL-2.0 */ /* * KUnit API allowing dynamic analysis tools to interact with KUnit tests * * Copyright (C) 2020, Google LLC. * Author: Uriel Guajardo <urielguajardo@google.com> */ #ifndef _KUNIT_TEST_BUG_H #define _KUNIT_TEST_BUG_H #define kunit_fail_current_test(fmt, ...) \ __kunit_fail_current_test(__FILE__, __LINE__, fmt, ##__VA_ARGS__) #if IS_BUILTIN(CONFIG_KUNIT) extern __printf(3, 4) void __kunit_fail_current_test(const char *file, int line, const char *fmt, ...); #else static inline __printf(3, 4) void __kunit_fail_current_test(const char *file, int line, const char *fmt, ...) { } #endif #endif /* _KUNIT_TEST_BUG_H */ PK ! <��i<� <� kunit/test.hnu �[��� /* SPDX-License-Identifier: GPL-2.0 */ /* * Base unit test (KUnit) API. * * Copyright (C) 2019, Google LLC. * Author: Brendan Higgins <brendanhiggins@google.com> */ #ifndef _KUNIT_TEST_H #define _KUNIT_TEST_H #include <kunit/assert.h> #include <kunit/try-catch.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/slab.h> #include <linux/types.h> #include <linux/kref.h> struct kunit_resource; typedef int (*kunit_resource_init_t)(struct kunit_resource *, void *); typedef void (*kunit_resource_free_t)(struct kunit_resource *); /** * struct kunit_resource - represents a *test managed resource* * @data: for the user to store arbitrary data. * @name: optional name * @free: a user supplied function to free the resource. Populated by * kunit_resource_alloc(). * * Represents a *test managed resource*, a resource which will automatically be * cleaned up at the end of a test case. * * Resources are reference counted so if a resource is retrieved via * kunit_alloc_and_get_resource() or kunit_find_resource(), we need * to call kunit_put_resource() to reduce the resource reference count * when finished with it. Note that kunit_alloc_resource() does not require a * kunit_resource_put() because it does not retrieve the resource itself. * * Example: * * .. code-block:: c * * struct kunit_kmalloc_params { * size_t size; * gfp_t gfp; * }; * * static int kunit_kmalloc_init(struct kunit_resource *res, void *context) * { * struct kunit_kmalloc_params *params = context; * res->data = kmalloc(params->size, params->gfp); * * if (!res->data) * return -ENOMEM; * * return 0; * } * * static void kunit_kmalloc_free(struct kunit_resource *res) * { * kfree(res->data); * } * * void *kunit_kmalloc(struct kunit *test, size_t size, gfp_t gfp) * { * struct kunit_kmalloc_params params; * * params.size = size; * params.gfp = gfp; * * return kunit_alloc_resource(test, kunit_kmalloc_init, * kunit_kmalloc_free, ¶ms); * } * * Resources can also be named, with lookup/removal done on a name * basis also. kunit_add_named_resource(), kunit_find_named_resource() * and kunit_destroy_named_resource(). Resource names must be * unique within the test instance. */ struct kunit_resource { void *data; const char *name; kunit_resource_free_t free; /* private: internal use only. */ struct kref refcount; struct list_head node; }; struct kunit; /* Size of log associated with test. */ #define KUNIT_LOG_SIZE 512 /* Maximum size of parameter description string. */ #define KUNIT_PARAM_DESC_SIZE 128 /* Maximum size of a status comment. */ #define KUNIT_STATUS_COMMENT_SIZE 256 /* * TAP specifies subtest stream indentation of 4 spaces, 8 spaces for a * sub-subtest. See the "Subtests" section in * https://node-tap.org/tap-protocol/ */ #define KUNIT_SUBTEST_INDENT " " #define KUNIT_SUBSUBTEST_INDENT " " /** * enum kunit_status - Type of result for a test or test suite * @KUNIT_SUCCESS: Denotes the test suite has not failed nor been skipped * @KUNIT_FAILURE: Denotes the test has failed. * @KUNIT_SKIPPED: Denotes the test has been skipped. */ enum kunit_status { KUNIT_SUCCESS, KUNIT_FAILURE, KUNIT_SKIPPED, }; /** * struct kunit_case - represents an individual test case. * * @run_case: the function representing the actual test case. * @name: the name of the test case. * @generate_params: the generator function for parameterized tests. * * A test case is a function with the signature, * ``void (*)(struct kunit *)`` * that makes expectations and assertions (see KUNIT_EXPECT_TRUE() and * KUNIT_ASSERT_TRUE()) about code under test. Each test case is associated * with a &struct kunit_suite and will be run after the suite's init * function and followed by the suite's exit function. * * A test case should be static and should only be created with the * KUNIT_CASE() macro; additionally, every array of test cases should be * terminated with an empty test case. * * Example: * * .. code-block:: c * * void add_test_basic(struct kunit *test) * { * KUNIT_EXPECT_EQ(test, 1, add(1, 0)); * KUNIT_EXPECT_EQ(test, 2, add(1, 1)); * KUNIT_EXPECT_EQ(test, 0, add(-1, 1)); * KUNIT_EXPECT_EQ(test, INT_MAX, add(0, INT_MAX)); * KUNIT_EXPECT_EQ(test, -1, add(INT_MAX, INT_MIN)); * } * * static struct kunit_case example_test_cases[] = { * KUNIT_CASE(add_test_basic), * {} * }; * */ struct kunit_case { void (*run_case)(struct kunit *test); const char *name; const void* (*generate_params)(const void *prev, char *desc); /* private: internal use only. */ enum kunit_status status; char *log; }; static inline char *kunit_status_to_ok_not_ok(enum kunit_status status) { switch (status) { case KUNIT_SKIPPED: case KUNIT_SUCCESS: return "ok"; case KUNIT_FAILURE: return "not ok"; } return "invalid"; } /** * KUNIT_CASE - A helper for creating a &struct kunit_case * * @test_name: a reference to a test case function. * * Takes a symbol for a function representing a test case and creates a * &struct kunit_case object from it. See the documentation for * &struct kunit_case for an example on how to use it. */ #define KUNIT_CASE(test_name) { .run_case = test_name, .name = #test_name } /** * KUNIT_CASE_PARAM - A helper for creation a parameterized &struct kunit_case * * @test_name: a reference to a test case function. * @gen_params: a reference to a parameter generator function. * * The generator function:: * * const void* gen_params(const void *prev, char *desc) * * is used to lazily generate a series of arbitrarily typed values that fit into * a void*. The argument @prev is the previously returned value, which should be * used to derive the next value; @prev is set to NULL on the initial generator * call. When no more values are available, the generator must return NULL. * Optionally write a string into @desc (size of KUNIT_PARAM_DESC_SIZE) * describing the parameter. */ #define KUNIT_CASE_PARAM(test_name, gen_params) \ { .run_case = test_name, .name = #test_name, \ .generate_params = gen_params } /** * struct kunit_suite - describes a related collection of &struct kunit_case * * @name: the name of the test. Purely informational. * @init: called before every test case. * @exit: called after every test case. * @test_cases: a null terminated array of test cases. * * A kunit_suite is a collection of related &struct kunit_case s, such that * @init is called before every test case and @exit is called after every * test case, similar to the notion of a *test fixture* or a *test class* * in other unit testing frameworks like JUnit or Googletest. * * Every &struct kunit_case must be associated with a kunit_suite for KUnit * to run it. */ struct kunit_suite { const char name[256]; int (*init)(struct kunit *test); void (*exit)(struct kunit *test); struct kunit_case *test_cases; /* private: internal use only */ char status_comment[KUNIT_STATUS_COMMENT_SIZE]; struct dentry *debugfs; char *log; }; /** * struct kunit - represents a running instance of a test. * * @priv: for user to store arbitrary data. Commonly used to pass data * created in the init function (see &struct kunit_suite). * * Used to store information about the current context under which the test * is running. Most of this data is private and should only be accessed * indirectly via public functions; the one exception is @priv which can be * used by the test writer to store arbitrary data. */ struct kunit { void *priv; /* private: internal use only. */ const char *name; /* Read only after initialization! */ char *log; /* Points at case log after initialization */ struct kunit_try_catch try_catch; /* param_value is the current parameter value for a test case. */ const void *param_value; /* param_index stores the index of the parameter in parameterized tests. */ int param_index; /* * success starts as true, and may only be set to false during a * test case; thus, it is safe to update this across multiple * threads using WRITE_ONCE; however, as a consequence, it may only * be read after the test case finishes once all threads associated * with the test case have terminated. */ spinlock_t lock; /* Guards all mutable test state. */ enum kunit_status status; /* Read only after test_case finishes! */ /* * Because resources is a list that may be updated multiple times (with * new resources) from any thread associated with a test case, we must * protect it with some type of lock. */ struct list_head resources; /* Protected by lock. */ char status_comment[KUNIT_STATUS_COMMENT_SIZE]; }; static inline void kunit_set_failure(struct kunit *test) { WRITE_ONCE(test->status, KUNIT_FAILURE); } void kunit_init_test(struct kunit *test, const char *name, char *log); int kunit_run_tests(struct kunit_suite *suite); size_t kunit_suite_num_test_cases(struct kunit_suite *suite); unsigned int kunit_test_case_num(struct kunit_suite *suite, struct kunit_case *test_case); int __kunit_test_suites_init(struct kunit_suite * const * const suites); void __kunit_test_suites_exit(struct kunit_suite **suites); #if IS_BUILTIN(CONFIG_KUNIT) int kunit_run_all_tests(void); #else static inline int kunit_run_all_tests(void) { return 0; } #endif /* IS_BUILTIN(CONFIG_KUNIT) */ #ifdef MODULE /** * kunit_test_suites_for_module() - used to register one or more * &struct kunit_suite with KUnit. * * @__suites: a statically allocated list of &struct kunit_suite. * * Registers @__suites with the test framework. See &struct kunit_suite for * more information. * * If a test suite is built-in, module_init() gets translated into * an initcall which we don't want as the idea is that for builtins * the executor will manage execution. So ensure we do not define * module_{init|exit} functions for the builtin case when registering * suites via kunit_test_suites() below. */ #define kunit_test_suites_for_module(__suites) \ static int __init kunit_test_suites_init(void) \ { \ return __kunit_test_suites_init(__suites); \ } \ module_init(kunit_test_suites_init); \ \ static void __exit kunit_test_suites_exit(void) \ { \ return __kunit_test_suites_exit(__suites); \ } \ module_exit(kunit_test_suites_exit) #else #define kunit_test_suites_for_module(__suites) #endif /* MODULE */ #define __kunit_test_suites(unique_array, unique_suites, ...) \ static struct kunit_suite *unique_array[] = { __VA_ARGS__, NULL }; \ kunit_test_suites_for_module(unique_array); \ static struct kunit_suite **unique_suites \ __used __section(".kunit_test_suites") = unique_array /** * kunit_test_suites() - used to register one or more &struct kunit_suite * with KUnit. * * @__suites: a statically allocated list of &struct kunit_suite. * * Registers @suites with the test framework. See &struct kunit_suite for * more information. * * When builtin, KUnit tests are all run via executor; this is done * by placing the array of struct kunit_suite * in the .kunit_test_suites * ELF section. * * An alternative is to build the tests as a module. Because modules do not * support multiple initcall()s, we need to initialize an array of suites for a * module. * */ #define kunit_test_suites(__suites...) \ __kunit_test_suites(__UNIQUE_ID(array), \ __UNIQUE_ID(suites), \ ##__suites) #define kunit_test_suite(suite) kunit_test_suites(&suite) #define kunit_suite_for_each_test_case(suite, test_case) \ for (test_case = suite->test_cases; test_case->run_case; test_case++) enum kunit_status kunit_suite_has_succeeded(struct kunit_suite *suite); /* * Like kunit_alloc_resource() below, but returns the struct kunit_resource * object that contains the allocation. This is mostly for testing purposes. */ struct kunit_resource *kunit_alloc_and_get_resource(struct kunit *test, kunit_resource_init_t init, kunit_resource_free_t free, gfp_t internal_gfp, void *context); /** * kunit_get_resource() - Hold resource for use. Should not need to be used * by most users as we automatically get resources * retrieved by kunit_find_resource*(). * @res: resource */ static inline void kunit_get_resource(struct kunit_resource *res) { kref_get(&res->refcount); } /* * Called when refcount reaches zero via kunit_put_resources(); * should not be called directly. */ static inline void kunit_release_resource(struct kref *kref) { struct kunit_resource *res = container_of(kref, struct kunit_resource, refcount); /* If free function is defined, resource was dynamically allocated. */ if (res->free) { res->free(res); kfree(res); } } /** * kunit_put_resource() - When caller is done with retrieved resource, * kunit_put_resource() should be called to drop * reference count. The resource list maintains * a reference count on resources, so if no users * are utilizing a resource and it is removed from * the resource list, it will be freed via the * associated free function (if any). Only * needs to be used if we alloc_and_get() or * find() resource. * @res: resource */ static inline void kunit_put_resource(struct kunit_resource *res) { kref_put(&res->refcount, kunit_release_resource); } /** * kunit_add_resource() - Add a *test managed resource*. * @test: The test context object. * @init: a user-supplied function to initialize the result (if needed). If * none is supplied, the resource data value is simply set to @data. * If an init function is supplied, @data is passed to it instead. * @free: a user-supplied function to free the resource (if needed). * @res: The resource. * @data: value to pass to init function or set in resource data field. */ int kunit_add_resource(struct kunit *test, kunit_resource_init_t init, kunit_resource_free_t free, struct kunit_resource *res, void *data); /** * kunit_add_named_resource() - Add a named *test managed resource*. * @test: The test context object. * @init: a user-supplied function to initialize the resource data, if needed. * @free: a user-supplied function to free the resource data, if needed. * @res: The resource. * @name: name to be set for resource. * @data: value to pass to init function or set in resource data field. */ int kunit_add_named_resource(struct kunit *test, kunit_resource_init_t init, kunit_resource_free_t free, struct kunit_resource *res, const char *name, void *data); /** * kunit_alloc_resource() - Allocates a *test managed resource*. * @test: The test context object. * @init: a user supplied function to initialize the resource. * @free: a user supplied function to free the resource. * @internal_gfp: gfp to use for internal allocations, if unsure, use GFP_KERNEL * @context: for the user to pass in arbitrary data to the init function. * * Allocates a *test managed resource*, a resource which will automatically be * cleaned up at the end of a test case. See &struct kunit_resource for an * example. * * Note: KUnit needs to allocate memory for a kunit_resource object. You must * specify an @internal_gfp that is compatible with the use context of your * resource. */ static inline void *kunit_alloc_resource(struct kunit *test, kunit_resource_init_t init, kunit_resource_free_t free, gfp_t internal_gfp, void *context) { struct kunit_resource *res; res = kzalloc(sizeof(*res), internal_gfp); if (!res) return NULL; if (!kunit_add_resource(test, init, free, res, context)) return res->data; return NULL; } typedef bool (*kunit_resource_match_t)(struct kunit *test, struct kunit_resource *res, void *match_data); /** * kunit_resource_instance_match() - Match a resource with the same instance. * @test: Test case to which the resource belongs. * @res: The resource. * @match_data: The resource pointer to match against. * * An instance of kunit_resource_match_t that matches a resource whose * allocation matches @match_data. */ static inline bool kunit_resource_instance_match(struct kunit *test, struct kunit_resource *res, void *match_data) { return res->data == match_data; } /** * kunit_resource_name_match() - Match a resource with the same name. * @test: Test case to which the resource belongs. * @res: The resource. * @match_name: The name to match against. */ static inline bool kunit_resource_name_match(struct kunit *test, struct kunit_resource *res, void *match_name) { return res->name && strcmp(res->name, match_name) == 0; } /** * kunit_find_resource() - Find a resource using match function/data. * @test: Test case to which the resource belongs. * @match: match function to be applied to resources/match data. * @match_data: data to be used in matching. */ static inline struct kunit_resource * kunit_find_resource(struct kunit *test, kunit_resource_match_t match, void *match_data) { struct kunit_resource *res, *found = NULL; unsigned long flags; spin_lock_irqsave(&test->lock, flags); list_for_each_entry_reverse(res, &test->resources, node) { if (match(test, res, (void *)match_data)) { found = res; kunit_get_resource(found); break; } } spin_unlock_irqrestore(&test->lock, flags); return found; } /** * kunit_find_named_resource() - Find a resource using match name. * @test: Test case to which the resource belongs. * @name: match name. */ static inline struct kunit_resource * kunit_find_named_resource(struct kunit *test, const char *name) { return kunit_find_resource(test, kunit_resource_name_match, (void *)name); } /** * kunit_destroy_resource() - Find a kunit_resource and destroy it. * @test: Test case to which the resource belongs. * @match: Match function. Returns whether a given resource matches @match_data. * @match_data: Data passed into @match. * * RETURNS: * 0 if kunit_resource is found and freed, -ENOENT if not found. */ int kunit_destroy_resource(struct kunit *test, kunit_resource_match_t match, void *match_data); static inline int kunit_destroy_named_resource(struct kunit *test, const char *name) { return kunit_destroy_resource(test, kunit_resource_name_match, (void *)name); } /** * kunit_remove_resource() - remove resource from resource list associated with * test. * @test: The test context object. * @res: The resource to be removed. * * Note that the resource will not be immediately freed since it is likely * the caller has a reference to it via alloc_and_get() or find(); * in this case a final call to kunit_put_resource() is required. */ void kunit_remove_resource(struct kunit *test, struct kunit_resource *res); /** * kunit_kmalloc_array() - Like kmalloc_array() except the allocation is *test managed*. * @test: The test context object. * @n: number of elements. * @size: The size in bytes of the desired memory. * @gfp: flags passed to underlying kmalloc(). * * Just like `kmalloc_array(...)`, except the allocation is managed by the test case * and is automatically cleaned up after the test case concludes. See &struct * kunit_resource for more information. */ void *kunit_kmalloc_array(struct kunit *test, size_t n, size_t size, gfp_t gfp); /** * kunit_kmalloc() - Like kmalloc() except the allocation is *test managed*. * @test: The test context object. * @size: The size in bytes of the desired memory. * @gfp: flags passed to underlying kmalloc(). * * See kmalloc() and kunit_kmalloc_array() for more information. */ static inline void *kunit_kmalloc(struct kunit *test, size_t size, gfp_t gfp) { return kunit_kmalloc_array(test, 1, size, gfp); } /** * kunit_kfree() - Like kfree except for allocations managed by KUnit. * @test: The test case to which the resource belongs. * @ptr: The memory allocation to free. */ void kunit_kfree(struct kunit *test, const void *ptr); /** * kunit_kzalloc() - Just like kunit_kmalloc(), but zeroes the allocation. * @test: The test context object. * @size: The size in bytes of the desired memory. * @gfp: flags passed to underlying kmalloc(). * * See kzalloc() and kunit_kmalloc_array() for more information. */ static inline void *kunit_kzalloc(struct kunit *test, size_t size, gfp_t gfp) { return kunit_kmalloc(test, size, gfp | __GFP_ZERO); } /** * kunit_kcalloc() - Just like kunit_kmalloc_array(), but zeroes the allocation. * @test: The test context object. * @n: number of elements. * @size: The size in bytes of the desired memory. * @gfp: flags passed to underlying kmalloc(). * * See kcalloc() and kunit_kmalloc_array() for more information. */ static inline void *kunit_kcalloc(struct kunit *test, size_t n, size_t size, gfp_t gfp) { return kunit_kmalloc_array(test, n, size, gfp | __GFP_ZERO); } void kunit_cleanup(struct kunit *test); void __printf(2, 3) kunit_log_append(char *log, const char *fmt, ...); /** * kunit_mark_skipped() - Marks @test_or_suite as skipped * * @test_or_suite: The test context object. * @fmt: A printk() style format string. * * Marks the test as skipped. @fmt is given output as the test status * comment, typically the reason the test was skipped. * * Test execution continues after kunit_mark_skipped() is called. */ #define kunit_mark_skipped(test_or_suite, fmt, ...) \ do { \ WRITE_ONCE((test_or_suite)->status, KUNIT_SKIPPED); \ scnprintf((test_or_suite)->status_comment, \ KUNIT_STATUS_COMMENT_SIZE, \ fmt, ##__VA_ARGS__); \ } while (0) /** * kunit_skip() - Marks @test_or_suite as skipped * * @test_or_suite: The test context object. * @fmt: A printk() style format string. * * Skips the test. @fmt is given output as the test status * comment, typically the reason the test was skipped. * * Test execution is halted after kunit_skip() is called. */ #define kunit_skip(test_or_suite, fmt, ...) \ do { \ kunit_mark_skipped((test_or_suite), fmt, ##__VA_ARGS__);\ kunit_try_catch_throw(&((test_or_suite)->try_catch)); \ } while (0) /* * printk and log to per-test or per-suite log buffer. Logging only done * if CONFIG_KUNIT_DEBUGFS is 'y'; if it is 'n', no log is allocated/used. */ #define kunit_log(lvl, test_or_suite, fmt, ...) \ do { \ printk(lvl fmt, ##__VA_ARGS__); \ kunit_log_append((test_or_suite)->log, fmt "\n", \ ##__VA_ARGS__); \ } while (0) #define kunit_printk(lvl, test, fmt, ...) \ kunit_log(lvl, test, KUNIT_SUBTEST_INDENT "# %s: " fmt, \ (test)->name, ##__VA_ARGS__) /** * kunit_info() - Prints an INFO level message associated with @test. * * @test: The test context object. * @fmt: A printk() style format string. * * Prints an info level message associated with the test suite being run. * Takes a variable number of format parameters just like printk(). */ #define kunit_info(test, fmt, ...) \ kunit_printk(KERN_INFO, test, fmt, ##__VA_ARGS__) /** * kunit_warn() - Prints a WARN level message associated with @test. * * @test: The test context object. * @fmt: A printk() style format string. * * Prints a warning level message. */ #define kunit_warn(test, fmt, ...) \ kunit_printk(KERN_WARNING, test, fmt, ##__VA_ARGS__) /** * kunit_err() - Prints an ERROR level message associated with @test. * * @test: The test context object. * @fmt: A printk() style format string. * * Prints an error level message. */ #define kunit_err(test, fmt, ...) \ kunit_printk(KERN_ERR, test, fmt, ##__VA_ARGS__) /** * KUNIT_SUCCEED() - A no-op expectation. Only exists for code clarity. * @test: The test context object. * * The opposite of KUNIT_FAIL(), it is an expectation that cannot fail. In other * words, it does nothing and only exists for code clarity. See * KUNIT_EXPECT_TRUE() for more information. */ #define KUNIT_SUCCEED(test) do {} while (0) void kunit_do_assertion(struct kunit *test, struct kunit_assert *assert, bool pass, const char *fmt, ...); #define KUNIT_ASSERTION(test, pass, assert_class, INITIALIZER, fmt, ...) do { \ struct assert_class __assertion = INITIALIZER; \ kunit_do_assertion(test, \ &__assertion.assert, \ pass, \ fmt, \ ##__VA_ARGS__); \ } while (0) #define KUNIT_FAIL_ASSERTION(test, assert_type, fmt, ...) \ KUNIT_ASSERTION(test, \ false, \ kunit_fail_assert, \ KUNIT_INIT_FAIL_ASSERT_STRUCT(test, assert_type), \ fmt, \ ##__VA_ARGS__) /** * KUNIT_FAIL() - Always causes a test to fail when evaluated. * @test: The test context object. * @fmt: an informational message to be printed when the assertion is made. * @...: string format arguments. * * The opposite of KUNIT_SUCCEED(), it is an expectation that always fails. In * other words, it always results in a failed expectation, and consequently * always causes the test case to fail when evaluated. See KUNIT_EXPECT_TRUE() * for more information. */ #define KUNIT_FAIL(test, fmt, ...) \ KUNIT_FAIL_ASSERTION(test, \ KUNIT_EXPECTATION, \ fmt, \ ##__VA_ARGS__) #define KUNIT_UNARY_ASSERTION(test, \ assert_type, \ condition, \ expected_true, \ fmt, \ ...) \ KUNIT_ASSERTION(test, \ !!(condition) == !!expected_true, \ kunit_unary_assert, \ KUNIT_INIT_UNARY_ASSERT_STRUCT(test, \ assert_type, \ #condition, \ expected_true), \ fmt, \ ##__VA_ARGS__) #define KUNIT_TRUE_MSG_ASSERTION(test, assert_type, condition, fmt, ...) \ KUNIT_UNARY_ASSERTION(test, \ assert_type, \ condition, \ true, \ fmt, \ ##__VA_ARGS__) #define KUNIT_TRUE_ASSERTION(test, assert_type, condition) \ KUNIT_TRUE_MSG_ASSERTION(test, assert_type, condition, NULL) #define KUNIT_FALSE_MSG_ASSERTION(test, assert_type, condition, fmt, ...) \ KUNIT_UNARY_ASSERTION(test, \ assert_type, \ condition, \ false, \ fmt, \ ##__VA_ARGS__) #define KUNIT_FALSE_ASSERTION(test, assert_type, condition) \ KUNIT_FALSE_MSG_ASSERTION(test, assert_type, condition, NULL) /* * A factory macro for defining the assertions and expectations for the basic * comparisons defined for the built in types. * * Unfortunately, there is no common type that all types can be promoted to for * which all the binary operators behave the same way as for the actual types * (for example, there is no type that long long and unsigned long long can * both be cast to where the comparison result is preserved for all values). So * the best we can do is do the comparison in the original types and then coerce * everything to long long for printing; this way, the comparison behaves * correctly and the printed out value usually makes sense without * interpretation, but can always be interpreted to figure out the actual * value. */ #define KUNIT_BASE_BINARY_ASSERTION(test, \ assert_class, \ ASSERT_CLASS_INIT, \ assert_type, \ left, \ op, \ right, \ fmt, \ ...) \ do { \ typeof(left) __left = (left); \ typeof(right) __right = (right); \ \ KUNIT_ASSERTION(test, \ __left op __right, \ assert_class, \ ASSERT_CLASS_INIT(test, \ assert_type, \ #op, \ #left, \ __left, \ #right, \ __right), \ fmt, \ ##__VA_ARGS__); \ } while (0) #define KUNIT_BASE_EQ_MSG_ASSERTION(test, \ assert_class, \ ASSERT_CLASS_INIT, \ assert_type, \ left, \ right, \ fmt, \ ...) \ KUNIT_BASE_BINARY_ASSERTION(test, \ assert_class, \ ASSERT_CLASS_INIT, \ assert_type, \ left, ==, right, \ fmt, \ ##__VA_ARGS__) #define KUNIT_BASE_NE_MSG_ASSERTION(test, \ assert_class, \ ASSERT_CLASS_INIT, \ assert_type, \ left, \ right, \ fmt, \ ...) \ KUNIT_BASE_BINARY_ASSERTION(test, \ assert_class, \ ASSERT_CLASS_INIT, \ assert_type, \ left, !=, right, \ fmt, \ ##__VA_ARGS__) #define KUNIT_BASE_LT_MSG_ASSERTION(test, \ assert_class, \ ASSERT_CLASS_INIT, \ assert_type, \ left, \ right, \ fmt, \ ...) \ KUNIT_BASE_BINARY_ASSERTION(test, \ assert_class, \ ASSERT_CLASS_INIT, \ assert_type, \ left, <, right, \ fmt, \ ##__VA_ARGS__) #define KUNIT_BASE_LE_MSG_ASSERTION(test, \ assert_class, \ ASSERT_CLASS_INIT, \ assert_type, \ left, \ right, \ fmt, \ ...) \ KUNIT_BASE_BINARY_ASSERTION(test, \ assert_class, \ ASSERT_CLASS_INIT, \ assert_type, \ left, <=, right, \ fmt, \ ##__VA_ARGS__) #define KUNIT_BASE_GT_MSG_ASSERTION(test, \ assert_class, \ ASSERT_CLASS_INIT, \ assert_type, \ left, \ right, \ fmt, \ ...) \ KUNIT_BASE_BINARY_ASSERTION(test, \ assert_class, \ ASSERT_CLASS_INIT, \ assert_type, \ left, >, right, \ fmt, \ ##__VA_ARGS__) #define KUNIT_BASE_GE_MSG_ASSERTION(test, \ assert_class, \ ASSERT_CLASS_INIT, \ assert_type, \ left, \ right, \ fmt, \ ...) \ KUNIT_BASE_BINARY_ASSERTION(test, \ assert_class, \ ASSERT_CLASS_INIT, \ assert_type, \ left, >=, right, \ fmt, \ ##__VA_ARGS__) #define KUNIT_BINARY_EQ_MSG_ASSERTION(test, assert_type, left, right, fmt, ...)\ KUNIT_BASE_EQ_MSG_ASSERTION(test, \ kunit_binary_assert, \ KUNIT_INIT_BINARY_ASSERT_STRUCT, \ assert_type, \ left, \ right, \ fmt, \ ##__VA_ARGS__) #define KUNIT_BINARY_EQ_ASSERTION(test, assert_type, left, right) \ KUNIT_BINARY_EQ_MSG_ASSERTION(test, \ assert_type, \ left, \ right, \ NULL) #define KUNIT_BINARY_PTR_EQ_MSG_ASSERTION(test, \ assert_type, \ left, \ right, \ fmt, \ ...) \ KUNIT_BASE_EQ_MSG_ASSERTION(test, \ kunit_binary_ptr_assert, \ KUNIT_INIT_BINARY_PTR_ASSERT_STRUCT, \ assert_type, \ left, \ right, \ fmt, \ ##__VA_ARGS__) #define KUNIT_BINARY_PTR_EQ_ASSERTION(test, assert_type, left, right) \ KUNIT_BINARY_PTR_EQ_MSG_ASSERTION(test, \ assert_type, \ left, \ right, \ NULL) #define KUNIT_BINARY_NE_MSG_ASSERTION(test, assert_type, left, right, fmt, ...)\ KUNIT_BASE_NE_MSG_ASSERTION(test, \ kunit_binary_assert, \ KUNIT_INIT_BINARY_ASSERT_STRUCT, \ assert_type, \ left, \ right, \ fmt, \ ##__VA_ARGS__) #define KUNIT_BINARY_NE_ASSERTION(test, assert_type, left, right) \ KUNIT_BINARY_NE_MSG_ASSERTION(test, \ assert_type, \ left, \ right, \ NULL) #define KUNIT_BINARY_PTR_NE_MSG_ASSERTION(test, \ assert_type, \ left, \ right, \ fmt, \ ...) \ KUNIT_BASE_NE_MSG_ASSERTION(test, \ kunit_binary_ptr_assert, \ KUNIT_INIT_BINARY_PTR_ASSERT_STRUCT, \ assert_type, \ left, \ right, \ fmt, \ ##__VA_ARGS__) #define KUNIT_BINARY_PTR_NE_ASSERTION(test, assert_type, left, right) \ KUNIT_BINARY_PTR_NE_MSG_ASSERTION(test, \ assert_type, \ left, \ right, \ NULL) #define KUNIT_BINARY_LT_MSG_ASSERTION(test, assert_type, left, right, fmt, ...)\ KUNIT_BASE_LT_MSG_ASSERTION(test, \ kunit_binary_assert, \ KUNIT_INIT_BINARY_ASSERT_STRUCT, \ assert_type, \ left, \ right, \ fmt, \ ##__VA_ARGS__) #define KUNIT_BINARY_LT_ASSERTION(test, assert_type, left, right) \ KUNIT_BINARY_LT_MSG_ASSERTION(test, \ assert_type, \ left, \ right, \ NULL) #define KUNIT_BINARY_PTR_LT_MSG_ASSERTION(test, \ assert_type, \ left, \ right, \ fmt, \ ...) \ KUNIT_BASE_LT_MSG_ASSERTION(test, \ kunit_binary_ptr_assert, \ KUNIT_INIT_BINARY_PTR_ASSERT_STRUCT, \ assert_type, \ left, \ right, \ fmt, \ ##__VA_ARGS__) #define KUNIT_BINARY_PTR_LT_ASSERTION(test, assert_type, left, right) \ KUNIT_BINARY_PTR_LT_MSG_ASSERTION(test, \ assert_type, \ left, \ right, \ NULL) #define KUNIT_BINARY_LE_MSG_ASSERTION(test, assert_type, left, right, fmt, ...)\ KUNIT_BASE_LE_MSG_ASSERTION(test, \ kunit_binary_assert, \ KUNIT_INIT_BINARY_ASSERT_STRUCT, \ assert_type, \ left, \ right, \ fmt, \ ##__VA_ARGS__) #define KUNIT_BINARY_LE_ASSERTION(test, assert_type, left, right) \ KUNIT_BINARY_LE_MSG_ASSERTION(test, \ assert_type, \ left, \ right, \ NULL) #define KUNIT_BINARY_PTR_LE_MSG_ASSERTION(test, \ assert_type, \ left, \ right, \ fmt, \ ...) \ KUNIT_BASE_LE_MSG_ASSERTION(test, \ kunit_binary_ptr_assert, \ KUNIT_INIT_BINARY_PTR_ASSERT_STRUCT, \ assert_type, \ left, \ right, \ fmt, \ ##__VA_ARGS__) #define KUNIT_BINARY_PTR_LE_ASSERTION(test, assert_type, left, right) \ KUNIT_BINARY_PTR_LE_MSG_ASSERTION(test, \ assert_type, \ left, \ right, \ NULL) #define KUNIT_BINARY_GT_MSG_ASSERTION(test, assert_type, left, right, fmt, ...)\ KUNIT_BASE_GT_MSG_ASSERTION(test, \ kunit_binary_assert, \ KUNIT_INIT_BINARY_ASSERT_STRUCT, \ assert_type, \ left, \ right, \ fmt, \ ##__VA_ARGS__) #define KUNIT_BINARY_GT_ASSERTION(test, assert_type, left, right) \ KUNIT_BINARY_GT_MSG_ASSERTION(test, \ assert_type, \ left, \ right, \ NULL) #define KUNIT_BINARY_PTR_GT_MSG_ASSERTION(test, \ assert_type, \ left, \ right, \ fmt, \ ...) \ KUNIT_BASE_GT_MSG_ASSERTION(test, \ kunit_binary_ptr_assert, \ KUNIT_INIT_BINARY_PTR_ASSERT_STRUCT, \ assert_type, \ left, \ right, \ fmt, \ ##__VA_ARGS__) #define KUNIT_BINARY_PTR_GT_ASSERTION(test, assert_type, left, right) \ KUNIT_BINARY_PTR_GT_MSG_ASSERTION(test, \ assert_type, \ left, \ right, \ NULL) #define KUNIT_BINARY_GE_MSG_ASSERTION(test, assert_type, left, right, fmt, ...)\ KUNIT_BASE_GE_MSG_ASSERTION(test, \ kunit_binary_assert, \ KUNIT_INIT_BINARY_ASSERT_STRUCT, \ assert_type, \ left, \ right, \ fmt, \ ##__VA_ARGS__) #define KUNIT_BINARY_GE_ASSERTION(test, assert_type, left, right) \ KUNIT_BINARY_GE_MSG_ASSERTION(test, \ assert_type, \ left, \ right, \ NULL) #define KUNIT_BINARY_PTR_GE_MSG_ASSERTION(test, \ assert_type, \ left, \ right, \ fmt, \ ...) \ KUNIT_BASE_GE_MSG_ASSERTION(test, \ kunit_binary_ptr_assert, \ KUNIT_INIT_BINARY_PTR_ASSERT_STRUCT, \ assert_type, \ left, \ right, \ fmt, \ ##__VA_ARGS__) #define KUNIT_BINARY_PTR_GE_ASSERTION(test, assert_type, left, right) \ KUNIT_BINARY_PTR_GE_MSG_ASSERTION(test, \ assert_type, \ left, \ right, \ NULL) #define KUNIT_BINARY_STR_ASSERTION(test, \ assert_type, \ left, \ op, \ right, \ fmt, \ ...) \ do { \ const char *__left = (left); \ const char *__right = (right); \ \ KUNIT_ASSERTION(test, \ strcmp(__left, __right) op 0, \ kunit_binary_str_assert, \ KUNIT_INIT_BINARY_STR_ASSERT_STRUCT(test, \ assert_type, \ #op, \ #left, \ __left, \ #right, \ __right), \ fmt, \ ##__VA_ARGS__); \ } while (0) #define KUNIT_BINARY_STR_EQ_MSG_ASSERTION(test, \ assert_type, \ left, \ right, \ fmt, \ ...) \ KUNIT_BINARY_STR_ASSERTION(test, \ assert_type, \ left, ==, right, \ fmt, \ ##__VA_ARGS__) #define KUNIT_BINARY_STR_EQ_ASSERTION(test, assert_type, left, right) \ KUNIT_BINARY_STR_EQ_MSG_ASSERTION(test, \ assert_type, \ left, \ right, \ NULL) #define KUNIT_BINARY_STR_NE_MSG_ASSERTION(test, \ assert_type, \ left, \ right, \ fmt, \ ...) \ KUNIT_BINARY_STR_ASSERTION(test, \ assert_type, \ left, !=, right, \ fmt, \ ##__VA_ARGS__) #define KUNIT_BINARY_STR_NE_ASSERTION(test, assert_type, left, right) \ KUNIT_BINARY_STR_NE_MSG_ASSERTION(test, \ assert_type, \ left, \ right, \ NULL) #define KUNIT_PTR_NOT_ERR_OR_NULL_MSG_ASSERTION(test, \ assert_type, \ ptr, \ fmt, \ ...) \ do { \ typeof(ptr) __ptr = (ptr); \ \ KUNIT_ASSERTION(test, \ !IS_ERR_OR_NULL(__ptr), \ kunit_ptr_not_err_assert, \ KUNIT_INIT_PTR_NOT_ERR_STRUCT(test, \ assert_type, \ #ptr, \ __ptr), \ fmt, \ ##__VA_ARGS__); \ } while (0) #define KUNIT_PTR_NOT_ERR_OR_NULL_ASSERTION(test, assert_type, ptr) \ KUNIT_PTR_NOT_ERR_OR_NULL_MSG_ASSERTION(test, \ assert_type, \ ptr, \ NULL) /** * KUNIT_EXPECT_TRUE() - Causes a test failure when the expression is not true. * @test: The test context object. * @condition: an arbitrary boolean expression. The test fails when this does * not evaluate to true. * * This and expectations of the form `KUNIT_EXPECT_*` will cause the test case * to fail when the specified condition is not met; however, it will not prevent * the test case from continuing to run; this is otherwise known as an * *expectation failure*. */ #define KUNIT_EXPECT_TRUE(test, condition) \ KUNIT_TRUE_ASSERTION(test, KUNIT_EXPECTATION, condition) #define KUNIT_EXPECT_TRUE_MSG(test, condition, fmt, ...) \ KUNIT_TRUE_MSG_ASSERTION(test, \ KUNIT_EXPECTATION, \ condition, \ fmt, \ ##__VA_ARGS__) /** * KUNIT_EXPECT_FALSE() - Makes a test failure when the expression is not false. * @test: The test context object. * @condition: an arbitrary boolean expression. The test fails when this does * not evaluate to false. * * Sets an expectation that @condition evaluates to false. See * KUNIT_EXPECT_TRUE() for more information. */ #define KUNIT_EXPECT_FALSE(test, condition) \ KUNIT_FALSE_ASSERTION(test, KUNIT_EXPECTATION, condition) #define KUNIT_EXPECT_FALSE_MSG(test, condition, fmt, ...) \ KUNIT_FALSE_MSG_ASSERTION(test, \ KUNIT_EXPECTATION, \ condition, \ fmt, \ ##__VA_ARGS__) /** * KUNIT_EXPECT_EQ() - Sets an expectation that @left and @right are equal. * @test: The test context object. * @left: an arbitrary expression that evaluates to a primitive C type. * @right: an arbitrary expression that evaluates to a primitive C type. * * Sets an expectation that the values that @left and @right evaluate to are * equal. This is semantically equivalent to * KUNIT_EXPECT_TRUE(@test, (@left) == (@right)). See KUNIT_EXPECT_TRUE() for * more information. */ #define KUNIT_EXPECT_EQ(test, left, right) \ KUNIT_BINARY_EQ_ASSERTION(test, KUNIT_EXPECTATION, left, right) #define KUNIT_EXPECT_EQ_MSG(test, left, right, fmt, ...) \ KUNIT_BINARY_EQ_MSG_ASSERTION(test, \ KUNIT_EXPECTATION, \ left, \ right, \ fmt, \ ##__VA_ARGS__) /** * KUNIT_EXPECT_PTR_EQ() - Expects that pointers @left and @right are equal. * @test: The test context object. * @left: an arbitrary expression that evaluates to a pointer. * @right: an arbitrary expression that evaluates to a pointer. * * Sets an expectation that the values that @left and @right evaluate to are * equal. This is semantically equivalent to * KUNIT_EXPECT_TRUE(@test, (@left) == (@right)). See KUNIT_EXPECT_TRUE() for * more information. */ #define KUNIT_EXPECT_PTR_EQ(test, left, right) \ KUNIT_BINARY_PTR_EQ_ASSERTION(test, \ KUNIT_EXPECTATION, \ left, \ right) #define KUNIT_EXPECT_PTR_EQ_MSG(test, left, right, fmt, ...) \ KUNIT_BINARY_PTR_EQ_MSG_ASSERTION(test, \ KUNIT_EXPECTATION, \ left, \ right, \ fmt, \ ##__VA_ARGS__) /** * KUNIT_EXPECT_NE() - An expectation that @left and @right are not equal. * @test: The test context object. * @left: an arbitrary expression that evaluates to a primitive C type. * @right: an arbitrary expression that evaluates to a primitive C type. * * Sets an expectation that the values that @left and @right evaluate to are not * equal. This is semantically equivalent to * KUNIT_EXPECT_TRUE(@test, (@left) != (@right)). See KUNIT_EXPECT_TRUE() for * more information. */ #define KUNIT_EXPECT_NE(test, left, right) \ KUNIT_BINARY_NE_ASSERTION(test, KUNIT_EXPECTATION, left, right) #define KUNIT_EXPECT_NE_MSG(test, left, right, fmt, ...) \ KUNIT_BINARY_NE_MSG_ASSERTION(test, \ KUNIT_EXPECTATION, \ left, \ right, \ fmt, \ ##__VA_ARGS__) /** * KUNIT_EXPECT_PTR_NE() - Expects that pointers @left and @right are not equal. * @test: The test context object. * @left: an arbitrary expression that evaluates to a pointer. * @right: an arbitrary expression that evaluates to a pointer. * * Sets an expectation that the values that @left and @right evaluate to are not * equal. This is semantically equivalent to * KUNIT_EXPECT_TRUE(@test, (@left) != (@right)). See KUNIT_EXPECT_TRUE() for * more information. */ #define KUNIT_EXPECT_PTR_NE(test, left, right) \ KUNIT_BINARY_PTR_NE_ASSERTION(test, \ KUNIT_EXPECTATION, \ left, \ right) #define KUNIT_EXPECT_PTR_NE_MSG(test, left, right, fmt, ...) \ KUNIT_BINARY_PTR_NE_MSG_ASSERTION(test, \ KUNIT_EXPECTATION, \ left, \ right, \ fmt, \ ##__VA_ARGS__) /** * KUNIT_EXPECT_LT() - An expectation that @left is less than @right. * @test: The test context object. * @left: an arbitrary expression that evaluates to a primitive C type. * @right: an arbitrary expression that evaluates to a primitive C type. * * Sets an expectation that the value that @left evaluates to is less than the * value that @right evaluates to. This is semantically equivalent to * KUNIT_EXPECT_TRUE(@test, (@left) < (@right)). See KUNIT_EXPECT_TRUE() for * more information. */ #define KUNIT_EXPECT_LT(test, left, right) \ KUNIT_BINARY_LT_ASSERTION(test, KUNIT_EXPECTATION, left, right) #define KUNIT_EXPECT_LT_MSG(test, left, right, fmt, ...) \ KUNIT_BINARY_LT_MSG_ASSERTION(test, \ KUNIT_EXPECTATION, \ left, \ right, \ fmt, \ ##__VA_ARGS__) /** * KUNIT_EXPECT_LE() - Expects that @left is less than or equal to @right. * @test: The test context object. * @left: an arbitrary expression that evaluates to a primitive C type. * @right: an arbitrary expression that evaluates to a primitive C type. * * Sets an expectation that the value that @left evaluates to is less than or * equal to the value that @right evaluates to. Semantically this is equivalent * to KUNIT_EXPECT_TRUE(@test, (@left) <= (@right)). See KUNIT_EXPECT_TRUE() for * more information. */ #define KUNIT_EXPECT_LE(test, left, right) \ KUNIT_BINARY_LE_ASSERTION(test, KUNIT_EXPECTATION, left, right) #define KUNIT_EXPECT_LE_MSG(test, left, right, fmt, ...) \ KUNIT_BINARY_LE_MSG_ASSERTION(test, \ KUNIT_EXPECTATION, \ left, \ right, \ fmt, \ ##__VA_ARGS__) /** * KUNIT_EXPECT_GT() - An expectation that @left is greater than @right. * @test: The test context object. * @left: an arbitrary expression that evaluates to a primitive C type. * @right: an arbitrary expression that evaluates to a primitive C type. * * Sets an expectation that the value that @left evaluates to is greater than * the value that @right evaluates to. This is semantically equivalent to * KUNIT_EXPECT_TRUE(@test, (@left) > (@right)). See KUNIT_EXPECT_TRUE() for * more information. */ #define KUNIT_EXPECT_GT(test, left, right) \ KUNIT_BINARY_GT_ASSERTION(test, KUNIT_EXPECTATION, left, right) #define KUNIT_EXPECT_GT_MSG(test, left, right, fmt, ...) \ KUNIT_BINARY_GT_MSG_ASSERTION(test, \ KUNIT_EXPECTATION, \ left, \ right, \ fmt, \ ##__VA_ARGS__) /** * KUNIT_EXPECT_GE() - Expects that @left is greater than or equal to @right. * @test: The test context object. * @left: an arbitrary expression that evaluates to a primitive C type. * @right: an arbitrary expression that evaluates to a primitive C type. * * Sets an expectation that the value that @left evaluates to is greater than * the value that @right evaluates to. This is semantically equivalent to * KUNIT_EXPECT_TRUE(@test, (@left) >= (@right)). See KUNIT_EXPECT_TRUE() for * more information. */ #define KUNIT_EXPECT_GE(test, left, right) \ KUNIT_BINARY_GE_ASSERTION(test, KUNIT_EXPECTATION, left, right) #define KUNIT_EXPECT_GE_MSG(test, left, right, fmt, ...) \ KUNIT_BINARY_GE_MSG_ASSERTION(test, \ KUNIT_EXPECTATION, \ left, \ right, \ fmt, \ ##__VA_ARGS__) /** * KUNIT_EXPECT_STREQ() - Expects that strings @left and @right are equal. * @test: The test context object. * @left: an arbitrary expression that evaluates to a null terminated string. * @right: an arbitrary expression that evaluates to a null terminated string. * * Sets an expectation that the values that @left and @right evaluate to are * equal. This is semantically equivalent to * KUNIT_EXPECT_TRUE(@test, !strcmp((@left), (@right))). See KUNIT_EXPECT_TRUE() * for more information. */ #define KUNIT_EXPECT_STREQ(test, left, right) \ KUNIT_BINARY_STR_EQ_ASSERTION(test, KUNIT_EXPECTATION, left, right) #define KUNIT_EXPECT_STREQ_MSG(test, left, right, fmt, ...) \ KUNIT_BINARY_STR_EQ_MSG_ASSERTION(test, \ KUNIT_EXPECTATION, \ left, \ right, \ fmt, \ ##__VA_ARGS__) /** * KUNIT_EXPECT_STRNEQ() - Expects that strings @left and @right are not equal. * @test: The test context object. * @left: an arbitrary expression that evaluates to a null terminated string. * @right: an arbitrary expression that evaluates to a null terminated string. * * Sets an expectation that the values that @left and @right evaluate to are * not equal. This is semantically equivalent to * KUNIT_EXPECT_TRUE(@test, strcmp((@left), (@right))). See KUNIT_EXPECT_TRUE() * for more information. */ #define KUNIT_EXPECT_STRNEQ(test, left, right) \ KUNIT_BINARY_STR_NE_ASSERTION(test, KUNIT_EXPECTATION, left, right) #define KUNIT_EXPECT_STRNEQ_MSG(test, left, right, fmt, ...) \ KUNIT_BINARY_STR_NE_MSG_ASSERTION(test, \ KUNIT_EXPECTATION, \ left, \ right, \ fmt, \ ##__VA_ARGS__) /** * KUNIT_EXPECT_NOT_ERR_OR_NULL() - Expects that @ptr is not null and not err. * @test: The test context object. * @ptr: an arbitrary pointer. * * Sets an expectation that the value that @ptr evaluates to is not null and not * an errno stored in a pointer. This is semantically equivalent to * KUNIT_EXPECT_TRUE(@test, !IS_ERR_OR_NULL(@ptr)). See KUNIT_EXPECT_TRUE() for * more information. */ #define KUNIT_EXPECT_NOT_ERR_OR_NULL(test, ptr) \ KUNIT_PTR_NOT_ERR_OR_NULL_ASSERTION(test, KUNIT_EXPECTATION, ptr) #define KUNIT_EXPECT_NOT_ERR_OR_NULL_MSG(test, ptr, fmt, ...) \ KUNIT_PTR_NOT_ERR_OR_NULL_MSG_ASSERTION(test, \ KUNIT_EXPECTATION, \ ptr, \ fmt, \ ##__VA_ARGS__) #define KUNIT_ASSERT_FAILURE(test, fmt, ...) \ KUNIT_FAIL_ASSERTION(test, KUNIT_ASSERTION, fmt, ##__VA_ARGS__) /** * KUNIT_ASSERT_TRUE() - Sets an assertion that @condition is true. * @test: The test context object. * @condition: an arbitrary boolean expression. The test fails and aborts when * this does not evaluate to true. * * This and assertions of the form `KUNIT_ASSERT_*` will cause the test case to * fail *and immediately abort* when the specified condition is not met. Unlike * an expectation failure, it will prevent the test case from continuing to run; * this is otherwise known as an *assertion failure*. */ #define KUNIT_ASSERT_TRUE(test, condition) \ KUNIT_TRUE_ASSERTION(test, KUNIT_ASSERTION, condition) #define KUNIT_ASSERT_TRUE_MSG(test, condition, fmt, ...) \ KUNIT_TRUE_MSG_ASSERTION(test, \ KUNIT_ASSERTION, \ condition, \ fmt, \ ##__VA_ARGS__) /** * KUNIT_ASSERT_FALSE() - Sets an assertion that @condition is false. * @test: The test context object. * @condition: an arbitrary boolean expression. * * Sets an assertion that the value that @condition evaluates to is false. This * is the same as KUNIT_EXPECT_FALSE(), except it causes an assertion failure * (see KUNIT_ASSERT_TRUE()) when the assertion is not met. */ #define KUNIT_ASSERT_FALSE(test, condition) \ KUNIT_FALSE_ASSERTION(test, KUNIT_ASSERTION, condition) #define KUNIT_ASSERT_FALSE_MSG(test, condition, fmt, ...) \ KUNIT_FALSE_MSG_ASSERTION(test, \ KUNIT_ASSERTION, \ condition, \ fmt, \ ##__VA_ARGS__) /** * KUNIT_ASSERT_EQ() - Sets an assertion that @left and @right are equal. * @test: The test context object. * @left: an arbitrary expression that evaluates to a primitive C type. * @right: an arbitrary expression that evaluates to a primitive C type. * * Sets an assertion that the values that @left and @right evaluate to are * equal. This is the same as KUNIT_EXPECT_EQ(), except it causes an assertion * failure (see KUNIT_ASSERT_TRUE()) when the assertion is not met. */ #define KUNIT_ASSERT_EQ(test, left, right) \ KUNIT_BINARY_EQ_ASSERTION(test, KUNIT_ASSERTION, left, right) #define KUNIT_ASSERT_EQ_MSG(test, left, right, fmt, ...) \ KUNIT_BINARY_EQ_MSG_ASSERTION(test, \ KUNIT_ASSERTION, \ left, \ right, \ fmt, \ ##__VA_ARGS__) /** * KUNIT_ASSERT_PTR_EQ() - Asserts that pointers @left and @right are equal. * @test: The test context object. * @left: an arbitrary expression that evaluates to a pointer. * @right: an arbitrary expression that evaluates to a pointer. * * Sets an assertion that the values that @left and @right evaluate to are * equal. This is the same as KUNIT_EXPECT_EQ(), except it causes an assertion * failure (see KUNIT_ASSERT_TRUE()) when the assertion is not met. */ #define KUNIT_ASSERT_PTR_EQ(test, left, right) \ KUNIT_BINARY_PTR_EQ_ASSERTION(test, KUNIT_ASSERTION, left, right) #define KUNIT_ASSERT_PTR_EQ_MSG(test, left, right, fmt, ...) \ KUNIT_BINARY_PTR_EQ_MSG_ASSERTION(test, \ KUNIT_ASSERTION, \ left, \ right, \ fmt, \ ##__VA_ARGS__) /** * KUNIT_ASSERT_NE() - An assertion that @left and @right are not equal. * @test: The test context object. * @left: an arbitrary expression that evaluates to a primitive C type. * @right: an arbitrary expression that evaluates to a primitive C type. * * Sets an assertion that the values that @left and @right evaluate to are not * equal. This is the same as KUNIT_EXPECT_NE(), except it causes an assertion * failure (see KUNIT_ASSERT_TRUE()) when the assertion is not met. */ #define KUNIT_ASSERT_NE(test, left, right) \ KUNIT_BINARY_NE_ASSERTION(test, KUNIT_ASSERTION, left, right) #define KUNIT_ASSERT_NE_MSG(test, left, right, fmt, ...) \ KUNIT_BINARY_NE_MSG_ASSERTION(test, \ KUNIT_ASSERTION, \ left, \ right, \ fmt, \ ##__VA_ARGS__) /** * KUNIT_ASSERT_PTR_NE() - Asserts that pointers @left and @right are not equal. * KUNIT_ASSERT_PTR_EQ() - Asserts that pointers @left and @right are equal. * @test: The test context object. * @left: an arbitrary expression that evaluates to a pointer. * @right: an arbitrary expression that evaluates to a pointer. * * Sets an assertion that the values that @left and @right evaluate to are not * equal. This is the same as KUNIT_EXPECT_NE(), except it causes an assertion * failure (see KUNIT_ASSERT_TRUE()) when the assertion is not met. */ #define KUNIT_ASSERT_PTR_NE(test, left, right) \ KUNIT_BINARY_PTR_NE_ASSERTION(test, KUNIT_ASSERTION, left, right) #define KUNIT_ASSERT_PTR_NE_MSG(test, left, right, fmt, ...) \ KUNIT_BINARY_PTR_NE_MSG_ASSERTION(test, \ KUNIT_ASSERTION, \ left, \ right, \ fmt, \ ##__VA_ARGS__) /** * KUNIT_ASSERT_LT() - An assertion that @left is less than @right. * @test: The test context object. * @left: an arbitrary expression that evaluates to a primitive C type. * @right: an arbitrary expression that evaluates to a primitive C type. * * Sets an assertion that the value that @left evaluates to is less than the * value that @right evaluates to. This is the same as KUNIT_EXPECT_LT(), except * it causes an assertion failure (see KUNIT_ASSERT_TRUE()) when the assertion * is not met. */ #define KUNIT_ASSERT_LT(test, left, right) \ KUNIT_BINARY_LT_ASSERTION(test, KUNIT_ASSERTION, left, right) #define KUNIT_ASSERT_LT_MSG(test, left, right, fmt, ...) \ KUNIT_BINARY_LT_MSG_ASSERTION(test, \ KUNIT_ASSERTION, \ left, \ right, \ fmt, \ ##__VA_ARGS__) /** * KUNIT_ASSERT_LE() - An assertion that @left is less than or equal to @right. * @test: The test context object. * @left: an arbitrary expression that evaluates to a primitive C type. * @right: an arbitrary expression that evaluates to a primitive C type. * * Sets an assertion that the value that @left evaluates to is less than or * equal to the value that @right evaluates to. This is the same as * KUNIT_EXPECT_LE(), except it causes an assertion failure (see * KUNIT_ASSERT_TRUE()) when the assertion is not met. */ #define KUNIT_ASSERT_LE(test, left, right) \ KUNIT_BINARY_LE_ASSERTION(test, KUNIT_ASSERTION, left, right) #define KUNIT_ASSERT_LE_MSG(test, left, right, fmt, ...) \ KUNIT_BINARY_LE_MSG_ASSERTION(test, \ KUNIT_ASSERTION, \ left, \ right, \ fmt, \ ##__VA_ARGS__) /** * KUNIT_ASSERT_GT() - An assertion that @left is greater than @right. * @test: The test context object. * @left: an arbitrary expression that evaluates to a primitive C type. * @right: an arbitrary expression that evaluates to a primitive C type. * * Sets an assertion that the value that @left evaluates to is greater than the * value that @right evaluates to. This is the same as KUNIT_EXPECT_GT(), except * it causes an assertion failure (see KUNIT_ASSERT_TRUE()) when the assertion * is not met. */ #define KUNIT_ASSERT_GT(test, left, right) \ KUNIT_BINARY_GT_ASSERTION(test, KUNIT_ASSERTION, left, right) #define KUNIT_ASSERT_GT_MSG(test, left, right, fmt, ...) \ KUNIT_BINARY_GT_MSG_ASSERTION(test, \ KUNIT_ASSERTION, \ left, \ right, \ fmt, \ ##__VA_ARGS__) /** * KUNIT_ASSERT_GE() - Assertion that @left is greater than or equal to @right. * @test: The test context object. * @left: an arbitrary expression that evaluates to a primitive C type. * @right: an arbitrary expression that evaluates to a primitive C type. * * Sets an assertion that the value that @left evaluates to is greater than the * value that @right evaluates to. This is the same as KUNIT_EXPECT_GE(), except * it causes an assertion failure (see KUNIT_ASSERT_TRUE()) when the assertion * is not met. */ #define KUNIT_ASSERT_GE(test, left, right) \ KUNIT_BINARY_GE_ASSERTION(test, KUNIT_ASSERTION, left, right) #define KUNIT_ASSERT_GE_MSG(test, left, right, fmt, ...) \ KUNIT_BINARY_GE_MSG_ASSERTION(test, \ KUNIT_ASSERTION, \ left, \ right, \ fmt, \ ##__VA_ARGS__) /** * KUNIT_ASSERT_STREQ() - An assertion that strings @left and @right are equal. * @test: The test context object. * @left: an arbitrary expression that evaluates to a null terminated string. * @right: an arbitrary expression that evaluates to a null terminated string. * * Sets an assertion that the values that @left and @right evaluate to are * equal. This is the same as KUNIT_EXPECT_STREQ(), except it causes an * assertion failure (see KUNIT_ASSERT_TRUE()) when the assertion is not met. */ #define KUNIT_ASSERT_STREQ(test, left, right) \ KUNIT_BINARY_STR_EQ_ASSERTION(test, KUNIT_ASSERTION, left, right) #define KUNIT_ASSERT_STREQ_MSG(test, left, right, fmt, ...) \ KUNIT_BINARY_STR_EQ_MSG_ASSERTION(test, \ KUNIT_ASSERTION, \ left, \ right, \ fmt, \ ##__VA_ARGS__) /** * KUNIT_ASSERT_STRNEQ() - Expects that strings @left and @right are not equal. * @test: The test context object. * @left: an arbitrary expression that evaluates to a null terminated string. * @right: an arbitrary expression that evaluates to a null terminated string. * * Sets an expectation that the values that @left and @right evaluate to are * not equal. This is semantically equivalent to * KUNIT_ASSERT_TRUE(@test, strcmp((@left), (@right))). See KUNIT_ASSERT_TRUE() * for more information. */ #define KUNIT_ASSERT_STRNEQ(test, left, right) \ KUNIT_BINARY_STR_NE_ASSERTION(test, KUNIT_ASSERTION, left, right) #define KUNIT_ASSERT_STRNEQ_MSG(test, left, right, fmt, ...) \ KUNIT_BINARY_STR_NE_MSG_ASSERTION(test, \ KUNIT_ASSERTION, \ left, \ right, \ fmt, \ ##__VA_ARGS__) /** * KUNIT_ASSERT_NOT_ERR_OR_NULL() - Assertion that @ptr is not null and not err. * @test: The test context object. * @ptr: an arbitrary pointer. * * Sets an assertion that the value that @ptr evaluates to is not null and not * an errno stored in a pointer. This is the same as * KUNIT_EXPECT_NOT_ERR_OR_NULL(), except it causes an assertion failure (see * KUNIT_ASSERT_TRUE()) when the assertion is not met. */ #define KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr) \ KUNIT_PTR_NOT_ERR_OR_NULL_ASSERTION(test, KUNIT_ASSERTION, ptr) #define KUNIT_ASSERT_NOT_ERR_OR_NULL_MSG(test, ptr, fmt, ...) \ KUNIT_PTR_NOT_ERR_OR_NULL_MSG_ASSERTION(test, \ KUNIT_ASSERTION, \ ptr, \ fmt, \ ##__VA_ARGS__) /** * KUNIT_ARRAY_PARAM() - Define test parameter generator from an array. * @name: prefix for the test parameter generator function. * @array: array of test parameters. * @get_desc: function to convert param to description; NULL to use default * * Define function @name_gen_params which uses @array to generate parameters. */ #define KUNIT_ARRAY_PARAM(name, array, get_desc) \ static const void *name##_gen_params(const void *prev, char *desc) \ { \ typeof((array)[0]) *__next = prev ? ((typeof(__next)) prev) + 1 : (array); \ if (__next - (array) < ARRAY_SIZE((array))) { \ void (*__get_desc)(typeof(__next), char *) = get_desc; \ if (__get_desc) \ __get_desc(__next, desc); \ return __next; \ } \ return NULL; \ } #endif /* _KUNIT_TEST_H */ PK ! #7�+B B xen/xen-front-pgdir-shbuf.hnu �[��� /* SPDX-License-Identifier: GPL-2.0 OR MIT */ /* * Xen frontend/backend page directory based shared buffer * helper module. * * Copyright (C) 2018 EPAM Systems Inc. * * Author: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com> */ #ifndef __XEN_FRONT_PGDIR_SHBUF_H_ #define __XEN_FRONT_PGDIR_SHBUF_H_ #include <linux/kernel.h> #include <xen/grant_table.h> struct xen_front_pgdir_shbuf_ops; struct xen_front_pgdir_shbuf { /* * Number of references granted for the backend use: * * - for frontend allocated/imported buffers this holds the number * of grant references for the page directory and the pages * of the buffer * * - for the buffer provided by the backend this only holds the number * of grant references for the page directory itself as grant * references for the buffer will be provided by the backend. */ int num_grefs; grant_ref_t *grefs; /* Page directory backing storage. */ u8 *directory; /* * Number of pages for the shared buffer itself (excluding the page * directory). */ int num_pages; /* * Backing storage of the shared buffer: these are the pages being * shared. */ struct page **pages; struct xenbus_device *xb_dev; /* These are the ops used internally depending on be_alloc mode. */ const struct xen_front_pgdir_shbuf_ops *ops; /* Xen map handles for the buffer allocated by the backend. */ grant_handle_t *backend_map_handles; }; struct xen_front_pgdir_shbuf_cfg { struct xenbus_device *xb_dev; /* Number of pages of the buffer backing storage. */ int num_pages; /* Pages of the buffer to be shared. */ struct page **pages; /* * This is allocated outside because there are use-cases when * the buffer structure is allocated as a part of a bigger one. */ struct xen_front_pgdir_shbuf *pgdir; /* * Mode of grant reference sharing: if set then backend will share * grant references to the buffer with the frontend. */ int be_alloc; }; int xen_front_pgdir_shbuf_alloc(struct xen_front_pgdir_shbuf_cfg *cfg); grant_ref_t xen_front_pgdir_shbuf_get_dir_start(struct xen_front_pgdir_shbuf *buf); int xen_front_pgdir_shbuf_map(struct xen_front_pgdir_shbuf *buf); int xen_front_pgdir_shbuf_unmap(struct xen_front_pgdir_shbuf *buf); void xen_front_pgdir_shbuf_free(struct xen_front_pgdir_shbuf *buf); #endif /* __XEN_FRONT_PGDIR_SHBUF_H_ */ PK ! �닺2 2 xen/interface/version.hnu �[��� /* SPDX-License-Identifier: GPL-2.0 */ /****************************************************************************** * version.h * * Xen version, type, and compile information. * * Copyright (c) 2005, Nguyen Anh Quynh <aquynh@gmail.com> * Copyright (c) 2005, Keir Fraser <keir@xensource.com> */ #ifndef __XEN_PUBLIC_VERSION_H__ #define __XEN_PUBLIC_VERSION_H__ /* NB. All ops return zero on success, except XENVER_version. */ /* arg == NULL; returns major:minor (16:16). */ #define XENVER_version 0 /* arg == xen_extraversion_t. */ #define XENVER_extraversion 1 struct xen_extraversion { char extraversion[16]; }; #define XEN_EXTRAVERSION_LEN (sizeof(struct xen_extraversion)) /* arg == xen_compile_info_t. */ #define XENVER_compile_info 2 struct xen_compile_info { char compiler[64]; char compile_by[16]; char compile_domain[32]; char compile_date[32]; }; #define XENVER_capabilities 3 struct xen_capabilities_info { char info[1024]; }; #define XEN_CAPABILITIES_INFO_LEN (sizeof(struct xen_capabilities_info)) #define XENVER_changeset 4 struct xen_changeset_info { char info[64]; }; #define XEN_CHANGESET_INFO_LEN (sizeof(struct xen_changeset_info)) #define XENVER_platform_parameters 5 struct xen_platform_parameters { xen_ulong_t virt_start; }; #define XENVER_get_features 6 struct xen_feature_info { unsigned int submap_idx; /* IN: which 32-bit submap to return */ uint32_t submap; /* OUT: 32-bit submap */ }; /* Declares the features reported by XENVER_get_features. */ #include <xen/interface/features.h> /* arg == NULL; returns host memory page size. */ #define XENVER_pagesize 7 /* arg == xen_domain_handle_t. */ #define XENVER_guest_handle 8 #define XENVER_commandline 9 struct xen_commandline { char buf[1024]; }; /* * Return value is the number of bytes written, or XEN_Exx on error. * Calling with empty parameter returns the size of build_id. */ #define XENVER_build_id 10 struct xen_build_id { uint32_t len; /* IN: size of buf[]. */ unsigned char buf[]; }; #endif /* __XEN_PUBLIC_VERSION_H__ */ PK ! {J,� � xen/interface/xenpmu.hnu �[��� /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __XEN_PUBLIC_XENPMU_H__ #define __XEN_PUBLIC_XENPMU_H__ #include "xen.h" #define XENPMU_VER_MAJ 0 #define XENPMU_VER_MIN 1 /* * ` enum neg_errnoval * ` HYPERVISOR_xenpmu_op(enum xenpmu_op cmd, struct xenpmu_params *args); * * @cmd == XENPMU_* (PMU operation) * @args == struct xenpmu_params */ /* ` enum xenpmu_op { */ #define XENPMU_mode_get 0 /* Also used for getting PMU version */ #define XENPMU_mode_set 1 #define XENPMU_feature_get 2 #define XENPMU_feature_set 3 #define XENPMU_init 4 #define XENPMU_finish 5 #define XENPMU_lvtpc_set 6 #define XENPMU_flush 7 /* ` } */ /* Parameters structure for HYPERVISOR_xenpmu_op call */ struct xen_pmu_params { /* IN/OUT parameters */ struct { uint32_t maj; uint32_t min; } version; uint64_t val; /* IN parameters */ uint32_t vcpu; uint32_t pad; }; /* PMU modes: * - XENPMU_MODE_OFF: No PMU virtualization * - XENPMU_MODE_SELF: Guests can profile themselves * - XENPMU_MODE_HV: Guests can profile themselves, dom0 profiles * itself and Xen * - XENPMU_MODE_ALL: Only dom0 has access to VPMU and it profiles * everyone: itself, the hypervisor and the guests. */ #define XENPMU_MODE_OFF 0 #define XENPMU_MODE_SELF (1<<0) #define XENPMU_MODE_HV (1<<1) #define XENPMU_MODE_ALL (1<<2) /* * PMU features: * - XENPMU_FEATURE_INTEL_BTS: Intel BTS support (ignored on AMD) */ #define XENPMU_FEATURE_INTEL_BTS 1 /* * Shared PMU data between hypervisor and PV(H) domains. * * The hypervisor fills out this structure during PMU interrupt and sends an * interrupt to appropriate VCPU. * Architecture-independent fields of xen_pmu_data are WO for the hypervisor * and RO for the guest but some fields in xen_pmu_arch can be writable * by both the hypervisor and the guest (see arch-$arch/pmu.h). */ struct xen_pmu_data { /* Interrupted VCPU */ uint32_t vcpu_id; /* * Physical processor on which the interrupt occurred. On non-privileged * guests set to vcpu_id; */ uint32_t pcpu_id; /* * Domain that was interrupted. On non-privileged guests set to * DOMID_SELF. * On privileged guests can be DOMID_SELF, DOMID_XEN, or, when in * XENPMU_MODE_ALL mode, domain ID of another domain. */ domid_t domain_id; uint8_t pad[6]; /* Architecture-specific information */ struct xen_pmu_arch pmu; }; #endif /* __XEN_PUBLIC_XENPMU_H__ */ PK ! 4>� xen/interface/elfnote.hnu �[��� /****************************************************************************** * elfnote.h * * Definitions used for the Xen ELF notes. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. * * Copyright (c) 2006, Ian Campbell, XenSource Ltd. */ #ifndef __XEN_PUBLIC_ELFNOTE_H__ #define __XEN_PUBLIC_ELFNOTE_H__ /* * The notes should live in a SHT_NOTE segment and have "Xen" in the * name field. * * Numeric types are either 4 or 8 bytes depending on the content of * the desc field. * * LEGACY indicated the fields in the legacy __xen_guest string which * this a note type replaces. * * String values (for non-legacy) are NULL terminated ASCII, also known * as ASCIZ type. */ /* * NAME=VALUE pair (string). */ #define XEN_ELFNOTE_INFO 0 /* * The virtual address of the entry point (numeric). * * LEGACY: VIRT_ENTRY */ #define XEN_ELFNOTE_ENTRY 1 /* The virtual address of the hypercall transfer page (numeric). * * LEGACY: HYPERCALL_PAGE. (n.b. legacy value is a physical page * number not a virtual address) */ #define XEN_ELFNOTE_HYPERCALL_PAGE 2 /* The virtual address where the kernel image should be mapped (numeric). * * Defaults to 0. * * LEGACY: VIRT_BASE */ #define XEN_ELFNOTE_VIRT_BASE 3 /* * The offset of the ELF paddr field from the acutal required * pseudo-physical address (numeric). * * This is used to maintain backwards compatibility with older kernels * which wrote __PAGE_OFFSET into that field. This field defaults to 0 * if not present. * * LEGACY: ELF_PADDR_OFFSET. (n.b. legacy default is VIRT_BASE) */ #define XEN_ELFNOTE_PADDR_OFFSET 4 /* * The version of Xen that we work with (string). * * LEGACY: XEN_VER */ #define XEN_ELFNOTE_XEN_VERSION 5 /* * The name of the guest operating system (string). * * LEGACY: GUEST_OS */ #define XEN_ELFNOTE_GUEST_OS 6 /* * The version of the guest operating system (string). * * LEGACY: GUEST_VER */ #define XEN_ELFNOTE_GUEST_VERSION 7 /* * The loader type (string). * * LEGACY: LOADER */ #define XEN_ELFNOTE_LOADER 8 /* * The kernel supports PAE (x86/32 only, string = "yes" or "no"). * * LEGACY: PAE (n.b. The legacy interface included a provision to * indicate 'extended-cr3' support allowing L3 page tables to be * placed above 4G. It is assumed that any kernel new enough to use * these ELF notes will include this and therefore "yes" here is * equivalent to "yes[entended-cr3]" in the __xen_guest interface. */ #define XEN_ELFNOTE_PAE_MODE 9 /* * The features supported/required by this kernel (string). * * The string must consist of a list of feature names (as given in * features.h, without the "XENFEAT_" prefix) separated by '|' * characters. If a feature is required for the kernel to function * then the feature name must be preceded by a '!' character. * * LEGACY: FEATURES */ #define XEN_ELFNOTE_FEATURES 10 /* * The kernel requires the symbol table to be loaded (string = "yes" or "no") * LEGACY: BSD_SYMTAB (n.b. The legacy treated the presence or absence * of this string as a boolean flag rather than requiring "yes" or * "no". */ #define XEN_ELFNOTE_BSD_SYMTAB 11 /* * The lowest address the hypervisor hole can begin at (numeric). * * This must not be set higher than HYPERVISOR_VIRT_START. Its presence * also indicates to the hypervisor that the kernel can deal with the * hole starting at a higher address. */ #define XEN_ELFNOTE_HV_START_LOW 12 /* * List of maddr_t-sized mask/value pairs describing how to recognize * (non-present) L1 page table entries carrying valid MFNs (numeric). */ #define XEN_ELFNOTE_L1_MFN_VALID 13 /* * Whether or not the guest supports cooperative suspend cancellation. * This is a numeric value. * * Default is 0 */ #define XEN_ELFNOTE_SUSPEND_CANCEL 14 /* * The (non-default) location the initial phys-to-machine map should be * placed at by the hypervisor (Dom0) or the tools (DomU). * The kernel must be prepared for this mapping to be established using * large pages, despite such otherwise not being available to guests. * The kernel must also be able to handle the page table pages used for * this mapping not being accessible through the initial mapping. * (Only x86-64 supports this at present.) */ #define XEN_ELFNOTE_INIT_P2M 15 /* * Whether or not the guest can deal with being passed an initrd not * mapped through its initial page tables. */ #define XEN_ELFNOTE_MOD_START_PFN 16 /* * The features supported by this kernel (numeric). * * Other than XEN_ELFNOTE_FEATURES on pre-4.2 Xen, this note allows a * kernel to specify support for features that older hypervisors don't * know about. The set of features 4.2 and newer hypervisors will * consider supported by the kernel is the combination of the sets * specified through this and the string note. * * LEGACY: FEATURES */ #define XEN_ELFNOTE_SUPPORTED_FEATURES 17 /* * Physical entry point into the kernel. * * 32bit entry point into the kernel. When requested to launch the * guest kernel in a HVM container, Xen will use this entry point to * launch the guest in 32bit protected mode with paging disabled. * Ignored otherwise. */ #define XEN_ELFNOTE_PHYS32_ENTRY 18 /* * The number of the highest elfnote defined. */ #define XEN_ELFNOTE_MAX XEN_ELFNOTE_PHYS32_ENTRY #endif /* __XEN_PUBLIC_ELFNOTE_H__ */ PK ! A�K K xen/interface/features.hnu �[��� /* SPDX-License-Identifier: GPL-2.0 */ /****************************************************************************** * features.h * * Feature flags, reported by XENVER_get_features. * * Copyright (c) 2006, Keir Fraser <keir@xensource.com> */ #ifndef __XEN_PUBLIC_FEATURES_H__ #define __XEN_PUBLIC_FEATURES_H__ /* * If set, the guest does not need to write-protect its pagetables, and can * update them via direct writes. */ #define XENFEAT_writable_page_tables 0 /* * If set, the guest does not need to write-protect its segment descriptor * tables, and can update them via direct writes. */ #define XENFEAT_writable_descriptor_tables 1 /* * If set, translation between the guest's 'pseudo-physical' address space * and the host's machine address space are handled by the hypervisor. In this * mode the guest does not need to perform phys-to/from-machine translations * when performing page table operations. */ #define XENFEAT_auto_translated_physmap 2 /* If set, the guest is running in supervisor mode (e.g., x86 ring 0). */ #define XENFEAT_supervisor_mode_kernel 3 /* * If set, the guest does not need to allocate x86 PAE page directories * below 4GB. This flag is usually implied by auto_translated_physmap. */ #define XENFEAT_pae_pgdir_above_4gb 4 /* x86: Does this Xen host support the MMU_PT_UPDATE_PRESERVE_AD hypercall? */ #define XENFEAT_mmu_pt_update_preserve_ad 5 /* x86: Does this Xen host support the MMU_{CLEAR,COPY}_PAGE hypercall? */ #define XENFEAT_highmem_assist 6 /* * If set, GNTTABOP_map_grant_ref honors flags to be placed into guest kernel * available pte bits. */ #define XENFEAT_gnttab_map_avail_bits 7 /* x86: Does this Xen host support the HVM callback vector type? */ #define XENFEAT_hvm_callback_vector 8 /* x86: pvclock algorithm is safe to use on HVM */ #define XENFEAT_hvm_safe_pvclock 9 /* x86: pirq can be used by HVM guests */ #define XENFEAT_hvm_pirqs 10 /* operation as Dom0 is supported */ #define XENFEAT_dom0 11 /* Xen also maps grant references at pfn = mfn. * This feature flag is deprecated and should not be used. #define XENFEAT_grant_map_identity 12 */ /* Guest can use XENMEMF_vnode to specify virtual node for memory op. */ #define XENFEAT_memory_op_vnode_supported 13 /* arm: Hypervisor supports ARM SMC calling convention. */ #define XENFEAT_ARM_SMCCC_supported 14 /* * x86/PVH: If set, ACPI RSDP can be placed at any address. Otherwise RSDP * must be located in lower 1MB, as required by ACPI Specification for IA-PC * systems. * This feature flag is only consulted if XEN_ELFNOTE_GUEST_OS contains * the "linux" string. */ #define XENFEAT_linux_rsdp_unrestricted 15 /* * A direct-mapped (or 1:1 mapped) domain is a domain for which its * local pages have gfn == mfn. If a domain is direct-mapped, * XENFEAT_direct_mapped is set; otherwise XENFEAT_not_direct_mapped * is set. * * If neither flag is set (e.g. older Xen releases) the assumptions are: * - not auto_translated domains (x86 only) are always direct-mapped * - on x86, auto_translated domains are not direct-mapped * - on ARM, Dom0 is direct-mapped, DomUs are not */ #define XENFEAT_not_direct_mapped 16 #define XENFEAT_direct_mapped 17 #define XENFEAT_NR_SUBMAPS 1 #endif /* __XEN_PUBLIC_FEATURES_H__ */ PK ! ��9� � xen/interface/physdev.hnu �[��� /* * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. */ #ifndef __XEN_PUBLIC_PHYSDEV_H__ #define __XEN_PUBLIC_PHYSDEV_H__ /* * Prototype for this hypercall is: * int physdev_op(int cmd, void *args) * @cmd == PHYSDEVOP_??? (physdev operation). * @args == Operation-specific extra arguments (NULL if none). */ /* * Notify end-of-interrupt (EOI) for the specified IRQ. * @arg == pointer to physdev_eoi structure. */ #define PHYSDEVOP_eoi 12 struct physdev_eoi { /* IN */ uint32_t irq; }; /* * Register a shared page for the hypervisor to indicate whether the guest * must issue PHYSDEVOP_eoi. The semantics of PHYSDEVOP_eoi change slightly * once the guest used this function in that the associated event channel * will automatically get unmasked. The page registered is used as a bit * array indexed by Xen's PIRQ value. */ #define PHYSDEVOP_pirq_eoi_gmfn_v1 17 /* * Register a shared page for the hypervisor to indicate whether the * guest must issue PHYSDEVOP_eoi. This hypercall is very similar to * PHYSDEVOP_pirq_eoi_gmfn_v1 but it doesn't change the semantics of * PHYSDEVOP_eoi. The page registered is used as a bit array indexed by * Xen's PIRQ value. */ #define PHYSDEVOP_pirq_eoi_gmfn_v2 28 struct physdev_pirq_eoi_gmfn { /* IN */ xen_ulong_t gmfn; }; /* * Query the status of an IRQ line. * @arg == pointer to physdev_irq_status_query structure. */ #define PHYSDEVOP_irq_status_query 5 struct physdev_irq_status_query { /* IN */ uint32_t irq; /* OUT */ uint32_t flags; /* XENIRQSTAT_* */ }; /* Need to call PHYSDEVOP_eoi when the IRQ has been serviced? */ #define _XENIRQSTAT_needs_eoi (0) #define XENIRQSTAT_needs_eoi (1U<<_XENIRQSTAT_needs_eoi) /* IRQ shared by multiple guests? */ #define _XENIRQSTAT_shared (1) #define XENIRQSTAT_shared (1U<<_XENIRQSTAT_shared) /* * Set the current VCPU's I/O privilege level. * @arg == pointer to physdev_set_iopl structure. */ #define PHYSDEVOP_set_iopl 6 struct physdev_set_iopl { /* IN */ uint32_t iopl; }; /* * Set the current VCPU's I/O-port permissions bitmap. * @arg == pointer to physdev_set_iobitmap structure. */ #define PHYSDEVOP_set_iobitmap 7 struct physdev_set_iobitmap { /* IN */ uint8_t * bitmap; uint32_t nr_ports; }; /* * Read or write an IO-APIC register. * @arg == pointer to physdev_apic structure. */ #define PHYSDEVOP_apic_read 8 #define PHYSDEVOP_apic_write 9 struct physdev_apic { /* IN */ unsigned long apic_physbase; uint32_t reg; /* IN or OUT */ uint32_t value; }; /* * Allocate or free a physical upcall vector for the specified IRQ line. * @arg == pointer to physdev_irq structure. */ #define PHYSDEVOP_alloc_irq_vector 10 #define PHYSDEVOP_free_irq_vector 11 struct physdev_irq { /* IN */ uint32_t irq; /* IN or OUT */ uint32_t vector; }; #define MAP_PIRQ_TYPE_MSI 0x0 #define MAP_PIRQ_TYPE_GSI 0x1 #define MAP_PIRQ_TYPE_UNKNOWN 0x2 #define MAP_PIRQ_TYPE_MSI_SEG 0x3 #define MAP_PIRQ_TYPE_MULTI_MSI 0x4 #define PHYSDEVOP_map_pirq 13 struct physdev_map_pirq { domid_t domid; /* IN */ int type; /* IN */ int index; /* IN or OUT */ int pirq; /* IN - high 16 bits hold segment for ..._MSI_SEG and ..._MULTI_MSI */ int bus; /* IN */ int devfn; /* IN * - For MSI-X contains entry number. * - For MSI with ..._MULTI_MSI contains number of vectors. * OUT (..._MULTI_MSI only) * - Number of vectors allocated. */ int entry_nr; /* IN */ uint64_t table_base; }; #define PHYSDEVOP_unmap_pirq 14 struct physdev_unmap_pirq { domid_t domid; /* IN */ int pirq; }; #define PHYSDEVOP_manage_pci_add 15 #define PHYSDEVOP_manage_pci_remove 16 struct physdev_manage_pci { /* IN */ uint8_t bus; uint8_t devfn; }; #define PHYSDEVOP_restore_msi 19 struct physdev_restore_msi { /* IN */ uint8_t bus; uint8_t devfn; }; #define PHYSDEVOP_manage_pci_add_ext 20 struct physdev_manage_pci_ext { /* IN */ uint8_t bus; uint8_t devfn; unsigned is_extfn; unsigned is_virtfn; struct { uint8_t bus; uint8_t devfn; } physfn; }; /* * Argument to physdev_op_compat() hypercall. Superceded by new physdev_op() * hypercall since 0x00030202. */ struct physdev_op { uint32_t cmd; union { struct physdev_irq_status_query irq_status_query; struct physdev_set_iopl set_iopl; struct physdev_set_iobitmap set_iobitmap; struct physdev_apic apic_op; struct physdev_irq irq_op; } u; }; #define PHYSDEVOP_setup_gsi 21 struct physdev_setup_gsi { int gsi; /* IN */ uint8_t triggering; /* IN */ uint8_t polarity; /* IN */ }; #define PHYSDEVOP_get_nr_pirqs 22 struct physdev_nr_pirqs { /* OUT */ uint32_t nr_pirqs; }; /* type is MAP_PIRQ_TYPE_GSI or MAP_PIRQ_TYPE_MSI * the hypercall returns a free pirq */ #define PHYSDEVOP_get_free_pirq 23 struct physdev_get_free_pirq { /* IN */ int type; /* OUT */ uint32_t pirq; }; #define XEN_PCI_DEV_EXTFN 0x1 #define XEN_PCI_DEV_VIRTFN 0x2 #define XEN_PCI_DEV_PXM 0x4 #define XEN_PCI_MMCFG_RESERVED 0x1 #define PHYSDEVOP_pci_mmcfg_reserved 24 struct physdev_pci_mmcfg_reserved { uint64_t address; uint16_t segment; uint8_t start_bus; uint8_t end_bus; uint32_t flags; }; #define PHYSDEVOP_pci_device_add 25 struct physdev_pci_device_add { /* IN */ uint16_t seg; uint8_t bus; uint8_t devfn; uint32_t flags; struct { uint8_t bus; uint8_t devfn; } physfn; #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L uint32_t optarr[]; #elif defined(__GNUC__) uint32_t optarr[0]; #endif }; #define PHYSDEVOP_pci_device_remove 26 #define PHYSDEVOP_restore_msi_ext 27 /* * Dom0 should use these two to announce MMIO resources assigned to * MSI-X capable devices won't (prepare) or may (release) change. */ #define PHYSDEVOP_prepare_msix 30 #define PHYSDEVOP_release_msix 31 struct physdev_pci_device { /* IN */ uint16_t seg; uint8_t bus; uint8_t devfn; }; #define PHYSDEVOP_DBGP_RESET_PREPARE 1 #define PHYSDEVOP_DBGP_RESET_DONE 2 #define PHYSDEVOP_DBGP_BUS_UNKNOWN 0 #define PHYSDEVOP_DBGP_BUS_PCI 1 #define PHYSDEVOP_dbgp_op 29 struct physdev_dbgp_op { /* IN */ uint8_t op; uint8_t bus; union { struct physdev_pci_device pci; } u; }; /* * Notify that some PIRQ-bound event channels have been unmasked. * ** This command is obsolete since interface version 0x00030202 and is ** * ** unsupported by newer versions of Xen. ** */ #define PHYSDEVOP_IRQ_UNMASK_NOTIFY 4 /* * These all-capitals physdev operation names are superceded by the new names * (defined above) since interface version 0x00030202. */ #define PHYSDEVOP_IRQ_STATUS_QUERY PHYSDEVOP_irq_status_query #define PHYSDEVOP_SET_IOPL PHYSDEVOP_set_iopl #define PHYSDEVOP_SET_IOBITMAP PHYSDEVOP_set_iobitmap #define PHYSDEVOP_APIC_READ PHYSDEVOP_apic_read #define PHYSDEVOP_APIC_WRITE PHYSDEVOP_apic_write #define PHYSDEVOP_ASSIGN_VECTOR PHYSDEVOP_alloc_irq_vector #define PHYSDEVOP_FREE_VECTOR PHYSDEVOP_free_irq_vector #define PHYSDEVOP_IRQ_NEEDS_UNMASK_NOTIFY XENIRQSTAT_needs_eoi #define PHYSDEVOP_IRQ_SHARED XENIRQSTAT_shared #endif /* __XEN_PUBLIC_PHYSDEV_H__ */ PK ! o � � xen/interface/event_channel.hnu �[��� /* SPDX-License-Identifier: GPL-2.0 */ /****************************************************************************** * event_channel.h * * Event channels between domains. * * Copyright (c) 2003-2004, K A Fraser. */ #ifndef __XEN_PUBLIC_EVENT_CHANNEL_H__ #define __XEN_PUBLIC_EVENT_CHANNEL_H__ #include <xen/interface/xen.h> typedef uint32_t evtchn_port_t; DEFINE_GUEST_HANDLE(evtchn_port_t); /* * EVTCHNOP_alloc_unbound: Allocate a port in domain <dom> and mark as * accepting interdomain bindings from domain <remote_dom>. A fresh port * is allocated in <dom> and returned as <port>. * NOTES: * 1. If the caller is unprivileged then <dom> must be DOMID_SELF. * 2. <rdom> may be DOMID_SELF, allowing loopback connections. */ #define EVTCHNOP_alloc_unbound 6 struct evtchn_alloc_unbound { /* IN parameters */ domid_t dom, remote_dom; /* OUT parameters */ evtchn_port_t port; }; /* * EVTCHNOP_bind_interdomain: Construct an interdomain event channel between * the calling domain and <remote_dom>. <remote_dom,remote_port> must identify * a port that is unbound and marked as accepting bindings from the calling * domain. A fresh port is allocated in the calling domain and returned as * <local_port>. * NOTES: * 2. <remote_dom> may be DOMID_SELF, allowing loopback connections. */ #define EVTCHNOP_bind_interdomain 0 struct evtchn_bind_interdomain { /* IN parameters. */ domid_t remote_dom; evtchn_port_t remote_port; /* OUT parameters. */ evtchn_port_t local_port; }; /* * EVTCHNOP_bind_virq: Bind a local event channel to VIRQ <irq> on specified * vcpu. * NOTES: * 1. A virtual IRQ may be bound to at most one event channel per vcpu. * 2. The allocated event channel is bound to the specified vcpu. The binding * may not be changed. */ #define EVTCHNOP_bind_virq 1 struct evtchn_bind_virq { /* IN parameters. */ uint32_t virq; uint32_t vcpu; /* OUT parameters. */ evtchn_port_t port; }; /* * EVTCHNOP_bind_pirq: Bind a local event channel to PIRQ <irq>. * NOTES: * 1. A physical IRQ may be bound to at most one event channel per domain. * 2. Only a sufficiently-privileged domain may bind to a physical IRQ. */ #define EVTCHNOP_bind_pirq 2 struct evtchn_bind_pirq { /* IN parameters. */ uint32_t pirq; #define BIND_PIRQ__WILL_SHARE 1 uint32_t flags; /* BIND_PIRQ__* */ /* OUT parameters. */ evtchn_port_t port; }; /* * EVTCHNOP_bind_ipi: Bind a local event channel to receive events. * NOTES: * 1. The allocated event channel is bound to the specified vcpu. The binding * may not be changed. */ #define EVTCHNOP_bind_ipi 7 struct evtchn_bind_ipi { uint32_t vcpu; /* OUT parameters. */ evtchn_port_t port; }; /* * EVTCHNOP_close: Close a local event channel <port>. If the channel is * interdomain then the remote end is placed in the unbound state * (EVTCHNSTAT_unbound), awaiting a new connection. */ #define EVTCHNOP_close 3 struct evtchn_close { /* IN parameters. */ evtchn_port_t port; }; /* * EVTCHNOP_send: Send an event to the remote end of the channel whose local * endpoint is <port>. */ #define EVTCHNOP_send 4 struct evtchn_send { /* IN parameters. */ evtchn_port_t port; }; /* * EVTCHNOP_status: Get the current status of the communication channel which * has an endpoint at <dom, port>. * NOTES: * 1. <dom> may be specified as DOMID_SELF. * 2. Only a sufficiently-privileged domain may obtain the status of an event * channel for which <dom> is not DOMID_SELF. */ #define EVTCHNOP_status 5 struct evtchn_status { /* IN parameters */ domid_t dom; evtchn_port_t port; /* OUT parameters */ #define EVTCHNSTAT_closed 0 /* Channel is not in use. */ #define EVTCHNSTAT_unbound 1 /* Channel is waiting interdom connection.*/ #define EVTCHNSTAT_interdomain 2 /* Channel is connected to remote domain. */ #define EVTCHNSTAT_pirq 3 /* Channel is bound to a phys IRQ line. */ #define EVTCHNSTAT_virq 4 /* Channel is bound to a virtual IRQ line */ #define EVTCHNSTAT_ipi 5 /* Channel is bound to a virtual IPI line */ uint32_t status; uint32_t vcpu; /* VCPU to which this channel is bound. */ union { struct { domid_t dom; } unbound; /* EVTCHNSTAT_unbound */ struct { domid_t dom; evtchn_port_t port; } interdomain; /* EVTCHNSTAT_interdomain */ uint32_t pirq; /* EVTCHNSTAT_pirq */ uint32_t virq; /* EVTCHNSTAT_virq */ } u; }; /* * EVTCHNOP_bind_vcpu: Specify which vcpu a channel should notify when an * event is pending. * NOTES: * 1. IPI- and VIRQ-bound channels always notify the vcpu that initialised * the binding. This binding cannot be changed. * 2. All other channels notify vcpu0 by default. This default is set when * the channel is allocated (a port that is freed and subsequently reused * has its binding reset to vcpu0). */ #define EVTCHNOP_bind_vcpu 8 struct evtchn_bind_vcpu { /* IN parameters. */ evtchn_port_t port; uint32_t vcpu; }; /* * EVTCHNOP_unmask: Unmask the specified local event-channel port and deliver * a notification to the appropriate VCPU if an event is pending. */ #define EVTCHNOP_unmask 9 struct evtchn_unmask { /* IN parameters. */ evtchn_port_t port; }; /* * EVTCHNOP_reset: Close all event channels associated with specified domain. * NOTES: * 1. <dom> may be specified as DOMID_SELF. * 2. Only a sufficiently-privileged domain may specify other than DOMID_SELF. */ #define EVTCHNOP_reset 10 struct evtchn_reset { /* IN parameters. */ domid_t dom; }; typedef struct evtchn_reset evtchn_reset_t; /* * EVTCHNOP_init_control: initialize the control block for the FIFO ABI. */ #define EVTCHNOP_init_control 11 struct evtchn_init_control { /* IN parameters. */ uint64_t control_gfn; uint32_t offset; uint32_t vcpu; /* OUT parameters. */ uint8_t link_bits; uint8_t _pad[7]; }; /* * EVTCHNOP_expand_array: add an additional page to the event array. */ #define EVTCHNOP_expand_array 12 struct evtchn_expand_array { /* IN parameters. */ uint64_t array_gfn; }; /* * EVTCHNOP_set_priority: set the priority for an event channel. */ #define EVTCHNOP_set_priority 13 struct evtchn_set_priority { /* IN parameters. */ evtchn_port_t port; uint32_t priority; }; struct evtchn_op { uint32_t cmd; /* EVTCHNOP_* */ union { struct evtchn_alloc_unbound alloc_unbound; struct evtchn_bind_interdomain bind_interdomain; struct evtchn_bind_virq bind_virq; struct evtchn_bind_pirq bind_pirq; struct evtchn_bind_ipi bind_ipi; struct evtchn_close close; struct evtchn_send send; struct evtchn_status status; struct evtchn_bind_vcpu bind_vcpu; struct evtchn_unmask unmask; } u; }; DEFINE_GUEST_HANDLE_STRUCT(evtchn_op); /* * 2-level ABI */ #define EVTCHN_2L_NR_CHANNELS (sizeof(xen_ulong_t) * sizeof(xen_ulong_t) * 64) /* * FIFO ABI */ /* Events may have priorities from 0 (highest) to 15 (lowest). */ #define EVTCHN_FIFO_PRIORITY_MAX 0 #define EVTCHN_FIFO_PRIORITY_DEFAULT 7 #define EVTCHN_FIFO_PRIORITY_MIN 15 #define EVTCHN_FIFO_MAX_QUEUES (EVTCHN_FIFO_PRIORITY_MIN + 1) typedef uint32_t event_word_t; #define EVTCHN_FIFO_PENDING 31 #define EVTCHN_FIFO_MASKED 30 #define EVTCHN_FIFO_LINKED 29 #define EVTCHN_FIFO_BUSY 28 #define EVTCHN_FIFO_LINK_BITS 17 #define EVTCHN_FIFO_LINK_MASK ((1 << EVTCHN_FIFO_LINK_BITS) - 1) #define EVTCHN_FIFO_NR_CHANNELS (1 << EVTCHN_FIFO_LINK_BITS) struct evtchn_fifo_control_block { uint32_t ready; uint32_t _rsvd; event_word_t head[EVTCHN_FIFO_MAX_QUEUES]; }; #endif /* __XEN_PUBLIC_EVENT_CHANNEL_H__ */ PK ! �Ø]! ! xen/interface/hvm/hvm_op.hnu �[��� /* * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. */ #ifndef __XEN_PUBLIC_HVM_HVM_OP_H__ #define __XEN_PUBLIC_HVM_HVM_OP_H__ #include <xen/interface/xen.h> /* Get/set subcommands: the second argument of the hypercall is a * pointer to a xen_hvm_param struct. */ #define HVMOP_set_param 0 #define HVMOP_get_param 1 struct xen_hvm_param { domid_t domid; /* IN */ uint32_t index; /* IN */ uint64_t value; /* IN/OUT */ }; DEFINE_GUEST_HANDLE_STRUCT(xen_hvm_param); /* Hint from PV drivers for pagetable destruction. */ #define HVMOP_pagetable_dying 9 struct xen_hvm_pagetable_dying { /* Domain with a pagetable about to be destroyed. */ domid_t domid; /* guest physical address of the toplevel pagetable dying */ aligned_u64 gpa; }; typedef struct xen_hvm_pagetable_dying xen_hvm_pagetable_dying_t; DEFINE_GUEST_HANDLE_STRUCT(xen_hvm_pagetable_dying_t); enum hvmmem_type_t { HVMMEM_ram_rw, /* Normal read/write guest RAM */ HVMMEM_ram_ro, /* Read-only; writes are discarded */ HVMMEM_mmio_dm, /* Reads and write go to the device model */ }; #define HVMOP_get_mem_type 15 /* Return hvmmem_type_t for the specified pfn. */ struct xen_hvm_get_mem_type { /* Domain to be queried. */ domid_t domid; /* OUT variable. */ uint16_t mem_type; uint16_t pad[2]; /* align next field on 8-byte boundary */ /* IN variable. */ uint64_t pfn; }; DEFINE_GUEST_HANDLE_STRUCT(xen_hvm_get_mem_type); #endif /* __XEN_PUBLIC_HVM_HVM_OP_H__ */ PK ! ;��7! ! xen/interface/hvm/hvm_vcpu.hnu �[��� /* * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. * * Copyright (c) 2015, Roger Pau Monne <roger.pau@citrix.com> */ #ifndef __XEN_PUBLIC_HVM_HVM_VCPU_H__ #define __XEN_PUBLIC_HVM_HVM_VCPU_H__ #include "../xen.h" struct vcpu_hvm_x86_32 { uint32_t eax; uint32_t ecx; uint32_t edx; uint32_t ebx; uint32_t esp; uint32_t ebp; uint32_t esi; uint32_t edi; uint32_t eip; uint32_t eflags; uint32_t cr0; uint32_t cr3; uint32_t cr4; uint32_t pad1; /* * EFER should only be used to set the NXE bit (if required) * when starting a vCPU in 32bit mode with paging enabled or * to set the LME/LMA bits in order to start the vCPU in * compatibility mode. */ uint64_t efer; uint32_t cs_base; uint32_t ds_base; uint32_t ss_base; uint32_t es_base; uint32_t tr_base; uint32_t cs_limit; uint32_t ds_limit; uint32_t ss_limit; uint32_t es_limit; uint32_t tr_limit; uint16_t cs_ar; uint16_t ds_ar; uint16_t ss_ar; uint16_t es_ar; uint16_t tr_ar; uint16_t pad2[3]; }; /* * The layout of the _ar fields of the segment registers is the * following: * * Bits [0,3]: type (bits 40-43). * Bit 4: s (descriptor type, bit 44). * Bit [5,6]: dpl (descriptor privilege level, bits 45-46). * Bit 7: p (segment-present, bit 47). * Bit 8: avl (available for system software, bit 52). * Bit 9: l (64-bit code segment, bit 53). * Bit 10: db (meaning depends on the segment, bit 54). * Bit 11: g (granularity, bit 55) * Bits [12,15]: unused, must be blank. * * A more complete description of the meaning of this fields can be * obtained from the Intel SDM, Volume 3, section 3.4.5. */ struct vcpu_hvm_x86_64 { uint64_t rax; uint64_t rcx; uint64_t rdx; uint64_t rbx; uint64_t rsp; uint64_t rbp; uint64_t rsi; uint64_t rdi; uint64_t rip; uint64_t rflags; uint64_t cr0; uint64_t cr3; uint64_t cr4; uint64_t efer; /* * Using VCPU_HVM_MODE_64B implies that the vCPU is launched * directly in long mode, so the cached parts of the segment * registers get set to match that environment. * * If the user wants to launch the vCPU in compatibility mode * the 32-bit structure should be used instead. */ }; struct vcpu_hvm_context { #define VCPU_HVM_MODE_32B 0 /* 32bit fields of the structure will be used. */ #define VCPU_HVM_MODE_64B 1 /* 64bit fields of the structure will be used. */ uint32_t mode; uint32_t pad; /* CPU registers. */ union { struct vcpu_hvm_x86_32 x86_32; struct vcpu_hvm_x86_64 x86_64; } cpu_regs; }; typedef struct vcpu_hvm_context vcpu_hvm_context_t; #endif /* __XEN_PUBLIC_HVM_HVM_VCPU_H__ */ PK ! u�|� xen/interface/hvm/params.hnu �[��� /* * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. */ #ifndef __XEN_PUBLIC_HVM_PARAMS_H__ #define __XEN_PUBLIC_HVM_PARAMS_H__ #include <xen/interface/hvm/hvm_op.h> /* * Parameter space for HVMOP_{set,get}_param. */ #define HVM_PARAM_CALLBACK_IRQ 0 /* * How should CPU0 event-channel notifications be delivered? * * If val == 0 then CPU0 event-channel notifications are not delivered. * If val != 0, val[63:56] encodes the type, as follows: */ #define HVM_PARAM_CALLBACK_TYPE_GSI 0 /* * val[55:0] is a delivery GSI. GSI 0 cannot be used, as it aliases val == 0, * and disables all notifications. */ #define HVM_PARAM_CALLBACK_TYPE_PCI_INTX 1 /* * val[55:0] is a delivery PCI INTx line: * Domain = val[47:32], Bus = val[31:16] DevFn = val[15:8], IntX = val[1:0] */ #if defined(__i386__) || defined(__x86_64__) #define HVM_PARAM_CALLBACK_TYPE_VECTOR 2 /* * val[7:0] is a vector number. Check for XENFEAT_hvm_callback_vector to know * if this delivery method is available. */ #elif defined(__arm__) || defined(__aarch64__) #define HVM_PARAM_CALLBACK_TYPE_PPI 2 /* * val[55:16] needs to be zero. * val[15:8] is interrupt flag of the PPI used by event-channel: * bit 8: the PPI is edge(1) or level(0) triggered * bit 9: the PPI is active low(1) or high(0) * val[7:0] is a PPI number used by event-channel. * This is only used by ARM/ARM64 and masking/eoi the interrupt associated to * the notification is handled by the interrupt controller. */ #endif #define HVM_PARAM_STORE_PFN 1 #define HVM_PARAM_STORE_EVTCHN 2 #define HVM_PARAM_PAE_ENABLED 4 #define HVM_PARAM_IOREQ_PFN 5 #define HVM_PARAM_BUFIOREQ_PFN 6 /* * Set mode for virtual timers (currently x86 only): * delay_for_missed_ticks (default): * Do not advance a vcpu's time beyond the correct delivery time for * interrupts that have been missed due to preemption. Deliver missed * interrupts when the vcpu is rescheduled and advance the vcpu's virtual * time stepwise for each one. * no_delay_for_missed_ticks: * As above, missed interrupts are delivered, but guest time always tracks * wallclock (i.e., real) time while doing so. * no_missed_ticks_pending: * No missed interrupts are held pending. Instead, to ensure ticks are * delivered at some non-zero rate, if we detect missed ticks then the * internal tick alarm is not disabled if the VCPU is preempted during the * next tick period. * one_missed_tick_pending: * Missed interrupts are collapsed together and delivered as one 'late tick'. * Guest time always tracks wallclock (i.e., real) time. */ #define HVM_PARAM_TIMER_MODE 10 #define HVMPTM_delay_for_missed_ticks 0 #define HVMPTM_no_delay_for_missed_ticks 1 #define HVMPTM_no_missed_ticks_pending 2 #define HVMPTM_one_missed_tick_pending 3 /* Boolean: Enable virtual HPET (high-precision event timer)? (x86-only) */ #define HVM_PARAM_HPET_ENABLED 11 /* Identity-map page directory used by Intel EPT when CR0.PG=0. */ #define HVM_PARAM_IDENT_PT 12 /* Device Model domain, defaults to 0. */ #define HVM_PARAM_DM_DOMAIN 13 /* ACPI S state: currently support S0 and S3 on x86. */ #define HVM_PARAM_ACPI_S_STATE 14 /* TSS used on Intel when CR0.PE=0. */ #define HVM_PARAM_VM86_TSS 15 /* Boolean: Enable aligning all periodic vpts to reduce interrupts */ #define HVM_PARAM_VPT_ALIGN 16 /* Console debug shared memory ring and event channel */ #define HVM_PARAM_CONSOLE_PFN 17 #define HVM_PARAM_CONSOLE_EVTCHN 18 #define HVM_NR_PARAMS 19 #endif /* __XEN_PUBLIC_HVM_PARAMS_H__ */ PK ! }!�� � xen/interface/hvm/start_info.hnu �[��� /* * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. * * Copyright (c) 2016, Citrix Systems, Inc. */ #ifndef __XEN_PUBLIC_ARCH_X86_HVM_START_INFO_H__ #define __XEN_PUBLIC_ARCH_X86_HVM_START_INFO_H__ /* * Start of day structure passed to PVH guests and to HVM guests in %ebx. * * NOTE: nothing will be loaded at physical address 0, so a 0 value in any * of the address fields should be treated as not present. * * 0 +----------------+ * | magic | Contains the magic value XEN_HVM_START_MAGIC_VALUE * | | ("xEn3" with the 0x80 bit of the "E" set). * 4 +----------------+ * | version | Version of this structure. Current version is 1. New * | | versions are guaranteed to be backwards-compatible. * 8 +----------------+ * | flags | SIF_xxx flags. * 12 +----------------+ * | nr_modules | Number of modules passed to the kernel. * 16 +----------------+ * | modlist_paddr | Physical address of an array of modules * | | (layout of the structure below). * 24 +----------------+ * | cmdline_paddr | Physical address of the command line, * | | a zero-terminated ASCII string. * 32 +----------------+ * | rsdp_paddr | Physical address of the RSDP ACPI data structure. * 40 +----------------+ * | memmap_paddr | Physical address of the (optional) memory map. Only * | | present in version 1 and newer of the structure. * 48 +----------------+ * | memmap_entries | Number of entries in the memory map table. Zero * | | if there is no memory map being provided. Only * | | present in version 1 and newer of the structure. * 52 +----------------+ * | reserved | Version 1 and newer only. * 56 +----------------+ * * The layout of each entry in the module structure is the following: * * 0 +----------------+ * | paddr | Physical address of the module. * 8 +----------------+ * | size | Size of the module in bytes. * 16 +----------------+ * | cmdline_paddr | Physical address of the command line, * | | a zero-terminated ASCII string. * 24 +----------------+ * | reserved | * 32 +----------------+ * * The layout of each entry in the memory map table is as follows: * * 0 +----------------+ * | addr | Base address * 8 +----------------+ * | size | Size of mapping in bytes * 16 +----------------+ * | type | Type of mapping as defined between the hypervisor * | | and guest. See XEN_HVM_MEMMAP_TYPE_* values below. * 20 +----------------| * | reserved | * 24 +----------------+ * * The address and sizes are always a 64bit little endian unsigned integer. * * NB: Xen on x86 will always try to place all the data below the 4GiB * boundary. * * Version numbers of the hvm_start_info structure have evolved like this: * * Version 0: Initial implementation. * * Version 1: Added the memmap_paddr/memmap_entries fields (plus 4 bytes of * padding) to the end of the hvm_start_info struct. These new * fields can be used to pass a memory map to the guest. The * memory map is optional and so guests that understand version 1 * of the structure must check that memmap_entries is non-zero * before trying to read the memory map. */ #define XEN_HVM_START_MAGIC_VALUE 0x336ec578 /* * The values used in the type field of the memory map table entries are * defined below and match the Address Range Types as defined in the "System * Address Map Interfaces" section of the ACPI Specification. Please refer to * section 15 in version 6.2 of the ACPI spec: http://uefi.org/specifications */ #define XEN_HVM_MEMMAP_TYPE_RAM 1 #define XEN_HVM_MEMMAP_TYPE_RESERVED 2 #define XEN_HVM_MEMMAP_TYPE_ACPI 3 #define XEN_HVM_MEMMAP_TYPE_NVS 4 #define XEN_HVM_MEMMAP_TYPE_UNUSABLE 5 #define XEN_HVM_MEMMAP_TYPE_DISABLED 6 #define XEN_HVM_MEMMAP_TYPE_PMEM 7 /* * C representation of the x86/HVM start info layout. * * The canonical definition of this layout is above, this is just a way to * represent the layout described there using C types. */ struct hvm_start_info { uint32_t magic; /* Contains the magic value 0x336ec578 */ /* ("xEn3" with the 0x80 bit of the "E" set).*/ uint32_t version; /* Version of this structure. */ uint32_t flags; /* SIF_xxx flags. */ uint32_t nr_modules; /* Number of modules passed to the kernel. */ uint64_t modlist_paddr; /* Physical address of an array of */ /* hvm_modlist_entry. */ uint64_t cmdline_paddr; /* Physical address of the command line. */ uint64_t rsdp_paddr; /* Physical address of the RSDP ACPI data */ /* structure. */ /* All following fields only present in version 1 and newer */ uint64_t memmap_paddr; /* Physical address of an array of */ /* hvm_memmap_table_entry. */ uint32_t memmap_entries; /* Number of entries in the memmap table. */ /* Value will be zero if there is no memory */ /* map being provided. */ uint32_t reserved; /* Must be zero. */ }; struct hvm_modlist_entry { uint64_t paddr; /* Physical address of the module. */ uint64_t size; /* Size of the module in bytes. */ uint64_t cmdline_paddr; /* Physical address of the command line. */ uint64_t reserved; }; struct hvm_memmap_table_entry { uint64_t addr; /* Base address of the memory region */ uint64_t size; /* Size of the memory region in bytes */ uint32_t type; /* Mapping type */ uint32_t reserved; /* Must be zero for Version 1. */ }; #endif /* __XEN_PUBLIC_ARCH_X86_HVM_START_INFO_H__ */ PK ! �8�ZD D xen/interface/hvm/dm_op.hnu �[��� /* * Copyright (c) 2016, Citrix Systems Inc * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. */ #ifndef __XEN_PUBLIC_HVM_DM_OP_H__ #define __XEN_PUBLIC_HVM_DM_OP_H__ struct xen_dm_op_buf { GUEST_HANDLE(void) h; xen_ulong_t size; }; DEFINE_GUEST_HANDLE_STRUCT(xen_dm_op_buf); #endif /* __XEN_PUBLIC_HVM_DM_OP_H__ */ PK ! UU&-�Q �Q xen/interface/grant_table.hnu �[��� /****************************************************************************** * grant_table.h * * Interface for granting foreign access to page frames, and receiving * page-ownership transfers. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. * * Copyright (c) 2004, K A Fraser */ #ifndef __XEN_PUBLIC_GRANT_TABLE_H__ #define __XEN_PUBLIC_GRANT_TABLE_H__ #include <xen/interface/xen.h> /*********************************** * GRANT TABLE REPRESENTATION */ /* Some rough guidelines on accessing and updating grant-table entries * in a concurrency-safe manner. For more information, Linux contains a * reference implementation for guest OSes (arch/xen/kernel/grant_table.c). * * NB. WMB is a no-op on current-generation x86 processors. However, a * compiler barrier will still be required. * * Introducing a valid entry into the grant table: * 1. Write ent->domid. * 2. Write ent->frame: * GTF_permit_access: Frame to which access is permitted. * GTF_accept_transfer: Pseudo-phys frame slot being filled by new * frame, or zero if none. * 3. Write memory barrier (WMB). * 4. Write ent->flags, inc. valid type. * * Invalidating an unused GTF_permit_access entry: * 1. flags = ent->flags. * 2. Observe that !(flags & (GTF_reading|GTF_writing)). * 3. Check result of SMP-safe CMPXCHG(&ent->flags, flags, 0). * NB. No need for WMB as reuse of entry is control-dependent on success of * step 3, and all architectures guarantee ordering of ctrl-dep writes. * * Invalidating an in-use GTF_permit_access entry: * This cannot be done directly. Request assistance from the domain controller * which can set a timeout on the use of a grant entry and take necessary * action. (NB. This is not yet implemented!). * * Invalidating an unused GTF_accept_transfer entry: * 1. flags = ent->flags. * 2. Observe that !(flags & GTF_transfer_committed). [*] * 3. Check result of SMP-safe CMPXCHG(&ent->flags, flags, 0). * NB. No need for WMB as reuse of entry is control-dependent on success of * step 3, and all architectures guarantee ordering of ctrl-dep writes. * [*] If GTF_transfer_committed is set then the grant entry is 'committed'. * The guest must /not/ modify the grant entry until the address of the * transferred frame is written. It is safe for the guest to spin waiting * for this to occur (detect by observing GTF_transfer_completed in * ent->flags). * * Invalidating a committed GTF_accept_transfer entry: * 1. Wait for (ent->flags & GTF_transfer_completed). * * Changing a GTF_permit_access from writable to read-only: * Use SMP-safe CMPXCHG to set GTF_readonly, while checking !GTF_writing. * * Changing a GTF_permit_access from read-only to writable: * Use SMP-safe bit-setting instruction. */ /* * Reference to a grant entry in a specified domain's grant table. */ typedef uint32_t grant_ref_t; /* * A grant table comprises a packed array of grant entries in one or more * page frames shared between Xen and a guest. * [XEN]: This field is written by Xen and read by the sharing guest. * [GST]: This field is written by the guest and read by Xen. */ /* * Version 1 of the grant table entry structure is maintained purely * for backwards compatibility. New guests should use version 2. */ struct grant_entry_v1 { /* GTF_xxx: various type and flag information. [XEN,GST] */ uint16_t flags; /* The domain being granted foreign privileges. [GST] */ domid_t domid; /* * GTF_permit_access: Frame that @domid is allowed to map and access. [GST] * GTF_accept_transfer: Frame whose ownership transferred by @domid. [XEN] */ uint32_t frame; }; /* * Type of grant entry. * GTF_invalid: This grant entry grants no privileges. * GTF_permit_access: Allow @domid to map/access @frame. * GTF_accept_transfer: Allow @domid to transfer ownership of one page frame * to this guest. Xen writes the page number to @frame. * GTF_transitive: Allow @domid to transitively access a subrange of * @trans_grant in @trans_domid. No mappings are allowed. */ #define GTF_invalid (0U<<0) #define GTF_permit_access (1U<<0) #define GTF_accept_transfer (2U<<0) #define GTF_transitive (3U<<0) #define GTF_type_mask (3U<<0) /* * Subflags for GTF_permit_access. * GTF_readonly: Restrict @domid to read-only mappings and accesses. [GST] * GTF_reading: Grant entry is currently mapped for reading by @domid. [XEN] * GTF_writing: Grant entry is currently mapped for writing by @domid. [XEN] * GTF_sub_page: Grant access to only a subrange of the page. @domid * will only be allowed to copy from the grant, and not * map it. [GST] */ #define _GTF_readonly (2) #define GTF_readonly (1U<<_GTF_readonly) #define _GTF_reading (3) #define GTF_reading (1U<<_GTF_reading) #define _GTF_writing (4) #define GTF_writing (1U<<_GTF_writing) #define _GTF_sub_page (8) #define GTF_sub_page (1U<<_GTF_sub_page) /* * Subflags for GTF_accept_transfer: * GTF_transfer_committed: Xen sets this flag to indicate that it is committed * to transferring ownership of a page frame. When a guest sees this flag * it must /not/ modify the grant entry until GTF_transfer_completed is * set by Xen. * GTF_transfer_completed: It is safe for the guest to spin-wait on this flag * after reading GTF_transfer_committed. Xen will always write the frame * address, followed by ORing this flag, in a timely manner. */ #define _GTF_transfer_committed (2) #define GTF_transfer_committed (1U<<_GTF_transfer_committed) #define _GTF_transfer_completed (3) #define GTF_transfer_completed (1U<<_GTF_transfer_completed) /* * Version 2 grant table entries. These fulfil the same role as * version 1 entries, but can represent more complicated operations. * Any given domain will have either a version 1 or a version 2 table, * and every entry in the table will be the same version. * * The interface by which domains use grant references does not depend * on the grant table version in use by the other domain. */ /* * Version 1 and version 2 grant entries share a common prefix. The * fields of the prefix are documented as part of struct * grant_entry_v1. */ struct grant_entry_header { uint16_t flags; domid_t domid; }; /* * Version 2 of the grant entry structure, here is a union because three * different types are suppotted: full_page, sub_page and transitive. */ union grant_entry_v2 { struct grant_entry_header hdr; /* * This member is used for V1-style full page grants, where either: * * -- hdr.type is GTF_accept_transfer, or * -- hdr.type is GTF_permit_access and GTF_sub_page is not set. * * In that case, the frame field has the same semantics as the * field of the same name in the V1 entry structure. */ struct { struct grant_entry_header hdr; uint32_t pad0; uint64_t frame; } full_page; /* * If the grant type is GTF_grant_access and GTF_sub_page is set, * @domid is allowed to access bytes [@page_off,@page_off+@length) * in frame @frame. */ struct { struct grant_entry_header hdr; uint16_t page_off; uint16_t length; uint64_t frame; } sub_page; /* * If the grant is GTF_transitive, @domid is allowed to use the * grant @gref in domain @trans_domid, as if it was the local * domain. Obviously, the transitive access must be compatible * with the original grant. */ struct { struct grant_entry_header hdr; domid_t trans_domid; uint16_t pad0; grant_ref_t gref; } transitive; uint32_t __spacer[4]; /* Pad to a power of two */ }; typedef uint16_t grant_status_t; /*********************************** * GRANT TABLE QUERIES AND USES */ /* * Handle to track a mapping created via a grant reference. */ typedef uint32_t grant_handle_t; /* * GNTTABOP_map_grant_ref: Map the grant entry (<dom>,<ref>) for access * by devices and/or host CPUs. If successful, <handle> is a tracking number * that must be presented later to destroy the mapping(s). On error, <handle> * is a negative status code. * NOTES: * 1. If GNTMAP_device_map is specified then <dev_bus_addr> is the address * via which I/O devices may access the granted frame. * 2. If GNTMAP_host_map is specified then a mapping will be added at * either a host virtual address in the current address space, or at * a PTE at the specified machine address. The type of mapping to * perform is selected through the GNTMAP_contains_pte flag, and the * address is specified in <host_addr>. * 3. Mappings should only be destroyed via GNTTABOP_unmap_grant_ref. If a * host mapping is destroyed by other means then it is *NOT* guaranteed * to be accounted to the correct grant reference! */ #define GNTTABOP_map_grant_ref 0 struct gnttab_map_grant_ref { /* IN parameters. */ uint64_t host_addr; uint32_t flags; /* GNTMAP_* */ grant_ref_t ref; domid_t dom; /* OUT parameters. */ int16_t status; /* GNTST_* */ grant_handle_t handle; uint64_t dev_bus_addr; }; DEFINE_GUEST_HANDLE_STRUCT(gnttab_map_grant_ref); /* * GNTTABOP_unmap_grant_ref: Destroy one or more grant-reference mappings * tracked by <handle>. If <host_addr> or <dev_bus_addr> is zero, that * field is ignored. If non-zero, they must refer to a device/host mapping * that is tracked by <handle> * NOTES: * 1. The call may fail in an undefined manner if either mapping is not * tracked by <handle>. * 3. After executing a batch of unmaps, it is guaranteed that no stale * mappings will remain in the device or host TLBs. */ #define GNTTABOP_unmap_grant_ref 1 struct gnttab_unmap_grant_ref { /* IN parameters. */ uint64_t host_addr; uint64_t dev_bus_addr; grant_handle_t handle; /* OUT parameters. */ int16_t status; /* GNTST_* */ }; DEFINE_GUEST_HANDLE_STRUCT(gnttab_unmap_grant_ref); /* * GNTTABOP_setup_table: Set up a grant table for <dom> comprising at least * <nr_frames> pages. The frame addresses are written to the <frame_list>. * Only <nr_frames> addresses are written, even if the table is larger. * NOTES: * 1. <dom> may be specified as DOMID_SELF. * 2. Only a sufficiently-privileged domain may specify <dom> != DOMID_SELF. * 3. Xen may not support more than a single grant-table page per domain. */ #define GNTTABOP_setup_table 2 struct gnttab_setup_table { /* IN parameters. */ domid_t dom; uint32_t nr_frames; /* OUT parameters. */ int16_t status; /* GNTST_* */ GUEST_HANDLE(xen_pfn_t) frame_list; }; DEFINE_GUEST_HANDLE_STRUCT(gnttab_setup_table); /* * GNTTABOP_dump_table: Dump the contents of the grant table to the * xen console. Debugging use only. */ #define GNTTABOP_dump_table 3 struct gnttab_dump_table { /* IN parameters. */ domid_t dom; /* OUT parameters. */ int16_t status; /* GNTST_* */ }; DEFINE_GUEST_HANDLE_STRUCT(gnttab_dump_table); /* * GNTTABOP_transfer_grant_ref: Transfer <frame> to a foreign domain. The * foreign domain has previously registered its interest in the transfer via * <domid, ref>. * * Note that, even if the transfer fails, the specified page no longer belongs * to the calling domain *unless* the error is GNTST_bad_page. */ #define GNTTABOP_transfer 4 struct gnttab_transfer { /* IN parameters. */ xen_pfn_t mfn; domid_t domid; grant_ref_t ref; /* OUT parameters. */ int16_t status; }; DEFINE_GUEST_HANDLE_STRUCT(gnttab_transfer); /* * GNTTABOP_copy: Hypervisor based copy * source and destinations can be eithers MFNs or, for foreign domains, * grant references. the foreign domain has to grant read/write access * in its grant table. * * The flags specify what type source and destinations are (either MFN * or grant reference). * * Note that this can also be used to copy data between two domains * via a third party if the source and destination domains had previously * grant appropriate access to their pages to the third party. * * source_offset specifies an offset in the source frame, dest_offset * the offset in the target frame and len specifies the number of * bytes to be copied. */ #define _GNTCOPY_source_gref (0) #define GNTCOPY_source_gref (1<<_GNTCOPY_source_gref) #define _GNTCOPY_dest_gref (1) #define GNTCOPY_dest_gref (1<<_GNTCOPY_dest_gref) #define GNTTABOP_copy 5 struct gnttab_copy { /* IN parameters. */ struct { union { grant_ref_t ref; xen_pfn_t gmfn; } u; domid_t domid; uint16_t offset; } source, dest; uint16_t len; uint16_t flags; /* GNTCOPY_* */ /* OUT parameters. */ int16_t status; }; DEFINE_GUEST_HANDLE_STRUCT(gnttab_copy); /* * GNTTABOP_query_size: Query the current and maximum sizes of the shared * grant table. * NOTES: * 1. <dom> may be specified as DOMID_SELF. * 2. Only a sufficiently-privileged domain may specify <dom> != DOMID_SELF. */ #define GNTTABOP_query_size 6 struct gnttab_query_size { /* IN parameters. */ domid_t dom; /* OUT parameters. */ uint32_t nr_frames; uint32_t max_nr_frames; int16_t status; /* GNTST_* */ }; DEFINE_GUEST_HANDLE_STRUCT(gnttab_query_size); /* * GNTTABOP_unmap_and_replace: Destroy one or more grant-reference mappings * tracked by <handle> but atomically replace the page table entry with one * pointing to the machine address under <new_addr>. <new_addr> will be * redirected to the null entry. * NOTES: * 1. The call may fail in an undefined manner if either mapping is not * tracked by <handle>. * 2. After executing a batch of unmaps, it is guaranteed that no stale * mappings will remain in the device or host TLBs. */ #define GNTTABOP_unmap_and_replace 7 struct gnttab_unmap_and_replace { /* IN parameters. */ uint64_t host_addr; uint64_t new_addr; grant_handle_t handle; /* OUT parameters. */ int16_t status; /* GNTST_* */ }; DEFINE_GUEST_HANDLE_STRUCT(gnttab_unmap_and_replace); /* * GNTTABOP_set_version: Request a particular version of the grant * table shared table structure. This operation can only be performed * once in any given domain. It must be performed before any grants * are activated; otherwise, the domain will be stuck with version 1. * The only defined versions are 1 and 2. */ #define GNTTABOP_set_version 8 struct gnttab_set_version { /* IN parameters */ uint32_t version; }; DEFINE_GUEST_HANDLE_STRUCT(gnttab_set_version); /* * GNTTABOP_get_status_frames: Get the list of frames used to store grant * status for <dom>. In grant format version 2, the status is separated * from the other shared grant fields to allow more efficient synchronization * using barriers instead of atomic cmpexch operations. * <nr_frames> specify the size of vector <frame_list>. * The frame addresses are returned in the <frame_list>. * Only <nr_frames> addresses are returned, even if the table is larger. * NOTES: * 1. <dom> may be specified as DOMID_SELF. * 2. Only a sufficiently-privileged domain may specify <dom> != DOMID_SELF. */ #define GNTTABOP_get_status_frames 9 struct gnttab_get_status_frames { /* IN parameters. */ uint32_t nr_frames; domid_t dom; /* OUT parameters. */ int16_t status; /* GNTST_* */ GUEST_HANDLE(uint64_t) frame_list; }; DEFINE_GUEST_HANDLE_STRUCT(gnttab_get_status_frames); /* * GNTTABOP_get_version: Get the grant table version which is in * effect for domain <dom>. */ #define GNTTABOP_get_version 10 struct gnttab_get_version { /* IN parameters */ domid_t dom; uint16_t pad; /* OUT parameters */ uint32_t version; }; DEFINE_GUEST_HANDLE_STRUCT(gnttab_get_version); /* * Issue one or more cache maintenance operations on a portion of a * page granted to the calling domain by a foreign domain. */ #define GNTTABOP_cache_flush 12 struct gnttab_cache_flush { union { uint64_t dev_bus_addr; grant_ref_t ref; } a; uint16_t offset; /* offset from start of grant */ uint16_t length; /* size within the grant */ #define GNTTAB_CACHE_CLEAN (1<<0) #define GNTTAB_CACHE_INVAL (1<<1) #define GNTTAB_CACHE_SOURCE_GREF (1<<31) uint32_t op; }; DEFINE_GUEST_HANDLE_STRUCT(gnttab_cache_flush); /* * Bitfield values for update_pin_status.flags. */ /* Map the grant entry for access by I/O devices. */ #define _GNTMAP_device_map (0) #define GNTMAP_device_map (1<<_GNTMAP_device_map) /* Map the grant entry for access by host CPUs. */ #define _GNTMAP_host_map (1) #define GNTMAP_host_map (1<<_GNTMAP_host_map) /* Accesses to the granted frame will be restricted to read-only access. */ #define _GNTMAP_readonly (2) #define GNTMAP_readonly (1<<_GNTMAP_readonly) /* * GNTMAP_host_map subflag: * 0 => The host mapping is usable only by the guest OS. * 1 => The host mapping is usable by guest OS + current application. */ #define _GNTMAP_application_map (3) #define GNTMAP_application_map (1<<_GNTMAP_application_map) /* * GNTMAP_contains_pte subflag: * 0 => This map request contains a host virtual address. * 1 => This map request contains the machine addess of the PTE to update. */ #define _GNTMAP_contains_pte (4) #define GNTMAP_contains_pte (1<<_GNTMAP_contains_pte) /* * Bits to be placed in guest kernel available PTE bits (architecture * dependent; only supported when XENFEAT_gnttab_map_avail_bits is set). */ #define _GNTMAP_guest_avail0 (16) #define GNTMAP_guest_avail_mask ((uint32_t)~0 << _GNTMAP_guest_avail0) /* * Values for error status returns. All errors are -ve. */ #define GNTST_okay (0) /* Normal return. */ #define GNTST_general_error (-1) /* General undefined error. */ #define GNTST_bad_domain (-2) /* Unrecognsed domain id. */ #define GNTST_bad_gntref (-3) /* Unrecognised or inappropriate gntref. */ #define GNTST_bad_handle (-4) /* Unrecognised or inappropriate handle. */ #define GNTST_bad_virt_addr (-5) /* Inappropriate virtual address to map. */ #define GNTST_bad_dev_addr (-6) /* Inappropriate device address to unmap.*/ #define GNTST_no_device_space (-7) /* Out of space in I/O MMU. */ #define GNTST_permission_denied (-8) /* Not enough privilege for operation. */ #define GNTST_bad_page (-9) /* Specified page was invalid for op. */ #define GNTST_bad_copy_arg (-10) /* copy arguments cross page boundary. */ #define GNTST_address_too_big (-11) /* transfer page address too large. */ #define GNTST_eagain (-12) /* Operation not done; try again. */ #define GNTTABOP_error_msgs { \ "okay", \ "undefined error", \ "unrecognised domain id", \ "invalid grant reference", \ "invalid mapping handle", \ "invalid virtual address", \ "invalid device address", \ "no spare translation slot in the I/O MMU", \ "permission denied", \ "bad page", \ "copy arguments cross page boundary", \ "page address size too large", \ "operation not done; try again" \ } #endif /* __XEN_PUBLIC_GRANT_TABLE_H__ */ PK ! ��G G xen/interface/sched.hnu �[��� /****************************************************************************** * sched.h * * Scheduler state interactions * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. * * Copyright (c) 2005, Keir Fraser <keir@xensource.com> */ #ifndef __XEN_PUBLIC_SCHED_H__ #define __XEN_PUBLIC_SCHED_H__ #include <xen/interface/event_channel.h> /* * Guest Scheduler Operations * * The SCHEDOP interface provides mechanisms for a guest to interact * with the scheduler, including yield, blocking and shutting itself * down. */ /* * The prototype for this hypercall is: * long HYPERVISOR_sched_op(enum sched_op cmd, void *arg, ...) * * @cmd == SCHEDOP_??? (scheduler operation). * @arg == Operation-specific extra argument(s), as described below. * ... == Additional Operation-specific extra arguments, described below. * * Versions of Xen prior to 3.0.2 provided only the following legacy version * of this hypercall, supporting only the commands yield, block and shutdown: * long sched_op(int cmd, unsigned long arg) * @cmd == SCHEDOP_??? (scheduler operation). * @arg == 0 (SCHEDOP_yield and SCHEDOP_block) * == SHUTDOWN_* code (SCHEDOP_shutdown) * * This legacy version is available to new guests as: * long HYPERVISOR_sched_op_compat(enum sched_op cmd, unsigned long arg) */ /* * Voluntarily yield the CPU. * @arg == NULL. */ #define SCHEDOP_yield 0 /* * Block execution of this VCPU until an event is received for processing. * If called with event upcalls masked, this operation will atomically * reenable event delivery and check for pending events before blocking the * VCPU. This avoids a "wakeup waiting" race. * @arg == NULL. */ #define SCHEDOP_block 1 /* * Halt execution of this domain (all VCPUs) and notify the system controller. * @arg == pointer to sched_shutdown structure. * * If the sched_shutdown_t reason is SHUTDOWN_suspend then * x86 PV guests must also set RDX (EDX for 32-bit guests) to the MFN * of the guest's start info page. RDX/EDX is the third hypercall * argument. * * In addition, which reason is SHUTDOWN_suspend this hypercall * returns 1 if suspend was cancelled or the domain was merely * checkpointed, and 0 if it is resuming in a new domain. */ #define SCHEDOP_shutdown 2 /* * Poll a set of event-channel ports. Return when one or more are pending. An * optional timeout may be specified. * @arg == pointer to sched_poll structure. */ #define SCHEDOP_poll 3 /* * Declare a shutdown for another domain. The main use of this function is * in interpreting shutdown requests and reasons for fully-virtualized * domains. A para-virtualized domain may use SCHEDOP_shutdown directly. * @arg == pointer to sched_remote_shutdown structure. */ #define SCHEDOP_remote_shutdown 4 /* * Latch a shutdown code, so that when the domain later shuts down it * reports this code to the control tools. * @arg == sched_shutdown, as for SCHEDOP_shutdown. */ #define SCHEDOP_shutdown_code 5 /* * Setup, poke and destroy a domain watchdog timer. * @arg == pointer to sched_watchdog structure. * With id == 0, setup a domain watchdog timer to cause domain shutdown * after timeout, returns watchdog id. * With id != 0 and timeout == 0, destroy domain watchdog timer. * With id != 0 and timeout != 0, poke watchdog timer and set new timeout. */ #define SCHEDOP_watchdog 6 /* * Override the current vcpu affinity by pinning it to one physical cpu or * undo this override restoring the previous affinity. * @arg == pointer to sched_pin_override structure. * * A negative pcpu value will undo a previous pin override and restore the * previous cpu affinity. * This call is allowed for the hardware domain only and requires the cpu * to be part of the domain's cpupool. */ #define SCHEDOP_pin_override 7 struct sched_shutdown { unsigned int reason; /* SHUTDOWN_* => shutdown reason */ }; DEFINE_GUEST_HANDLE_STRUCT(sched_shutdown); struct sched_poll { GUEST_HANDLE(evtchn_port_t) ports; unsigned int nr_ports; uint64_t timeout; }; DEFINE_GUEST_HANDLE_STRUCT(sched_poll); struct sched_remote_shutdown { domid_t domain_id; /* Remote domain ID */ unsigned int reason; /* SHUTDOWN_* => shutdown reason */ }; DEFINE_GUEST_HANDLE_STRUCT(sched_remote_shutdown); struct sched_watchdog { uint32_t id; /* watchdog ID */ uint32_t timeout; /* timeout */ }; DEFINE_GUEST_HANDLE_STRUCT(sched_watchdog); struct sched_pin_override { int32_t pcpu; }; DEFINE_GUEST_HANDLE_STRUCT(sched_pin_override); /* * Reason codes for SCHEDOP_shutdown. These may be interpreted by control * software to determine the appropriate action. For the most part, Xen does * not care about the shutdown code. */ #define SHUTDOWN_poweroff 0 /* Domain exited normally. Clean up and kill. */ #define SHUTDOWN_reboot 1 /* Clean up, kill, and then restart. */ #define SHUTDOWN_suspend 2 /* Clean up, save suspend info, kill. */ #define SHUTDOWN_crash 3 /* Tell controller we've crashed. */ #define SHUTDOWN_watchdog 4 /* Restart because watchdog time expired. */ /* * Domain asked to perform 'soft reset' for it. The expected behavior is to * reset internal Xen state for the domain returning it to the point where it * was created but leaving the domain's memory contents and vCPU contexts * intact. This will allow the domain to start over and set up all Xen specific * interfaces again. */ #define SHUTDOWN_soft_reset 5 #define SHUTDOWN_MAX 5 /* Maximum valid shutdown reason. */ #endif /* __XEN_PUBLIC_SCHED_H__ */ PK ! �:�* �* xen/interface/memory.hnu �[��� /* SPDX-License-Identifier: GPL-2.0 */ /****************************************************************************** * memory.h * * Memory reservation and information. * * Copyright (c) 2005, Keir Fraser <keir@xensource.com> */ #ifndef __XEN_PUBLIC_MEMORY_H__ #define __XEN_PUBLIC_MEMORY_H__ #include <linux/spinlock.h> /* * Increase or decrease the specified domain's memory reservation. Returns a * -ve errcode on failure, or the # extents successfully allocated or freed. * arg == addr of struct xen_memory_reservation. */ #define XENMEM_increase_reservation 0 #define XENMEM_decrease_reservation 1 #define XENMEM_populate_physmap 6 struct xen_memory_reservation { /* * XENMEM_increase_reservation: * OUT: MFN (*not* GMFN) bases of extents that were allocated * XENMEM_decrease_reservation: * IN: GMFN bases of extents to free * XENMEM_populate_physmap: * IN: GPFN bases of extents to populate with memory * OUT: GMFN bases of extents that were allocated * (NB. This command also updates the mach_to_phys translation table) */ GUEST_HANDLE(xen_pfn_t) extent_start; /* Number of extents, and size/alignment of each (2^extent_order pages). */ xen_ulong_t nr_extents; unsigned int extent_order; /* * Maximum # bits addressable by the user of the allocated region (e.g., * I/O devices often have a 32-bit limitation even in 64-bit systems). If * zero then the user has no addressing restriction. * This field is not used by XENMEM_decrease_reservation. */ unsigned int address_bits; /* * Domain whose reservation is being changed. * Unprivileged domains can specify only DOMID_SELF. */ domid_t domid; }; DEFINE_GUEST_HANDLE_STRUCT(xen_memory_reservation); /* * An atomic exchange of memory pages. If return code is zero then * @out.extent_list provides GMFNs of the newly-allocated memory. * Returns zero on complete success, otherwise a negative error code. * On complete success then always @nr_exchanged == @in.nr_extents. * On partial success @nr_exchanged indicates how much work was done. */ #define XENMEM_exchange 11 struct xen_memory_exchange { /* * [IN] Details of memory extents to be exchanged (GMFN bases). * Note that @in.address_bits is ignored and unused. */ struct xen_memory_reservation in; /* * [IN/OUT] Details of new memory extents. * We require that: * 1. @in.domid == @out.domid * 2. @in.nr_extents << @in.extent_order == * @out.nr_extents << @out.extent_order * 3. @in.extent_start and @out.extent_start lists must not overlap * 4. @out.extent_start lists GPFN bases to be populated * 5. @out.extent_start is overwritten with allocated GMFN bases */ struct xen_memory_reservation out; /* * [OUT] Number of input extents that were successfully exchanged: * 1. The first @nr_exchanged input extents were successfully * deallocated. * 2. The corresponding first entries in the output extent list correctly * indicate the GMFNs that were successfully exchanged. * 3. All other input and output extents are untouched. * 4. If not all input exents are exchanged then the return code of this * command will be non-zero. * 5. THIS FIELD MUST BE INITIALISED TO ZERO BY THE CALLER! */ xen_ulong_t nr_exchanged; }; DEFINE_GUEST_HANDLE_STRUCT(xen_memory_exchange); /* * Returns the maximum machine frame number of mapped RAM in this system. * This command always succeeds (it never returns an error code). * arg == NULL. */ #define XENMEM_maximum_ram_page 2 /* * Returns the current or maximum memory reservation, in pages, of the * specified domain (may be DOMID_SELF). Returns -ve errcode on failure. * arg == addr of domid_t. */ #define XENMEM_current_reservation 3 #define XENMEM_maximum_reservation 4 /* * Returns a list of MFN bases of 2MB extents comprising the machine_to_phys * mapping table. Architectures which do not have a m2p table do not implement * this command. * arg == addr of xen_machphys_mfn_list_t. */ #define XENMEM_machphys_mfn_list 5 struct xen_machphys_mfn_list { /* * Size of the 'extent_start' array. Fewer entries will be filled if the * machphys table is smaller than max_extents * 2MB. */ unsigned int max_extents; /* * Pointer to buffer to fill with list of extent starts. If there are * any large discontiguities in the machine address space, 2MB gaps in * the machphys table will be represented by an MFN base of zero. */ GUEST_HANDLE(xen_pfn_t) extent_start; /* * Number of extents written to the above array. This will be smaller * than 'max_extents' if the machphys table is smaller than max_e * 2MB. */ unsigned int nr_extents; }; DEFINE_GUEST_HANDLE_STRUCT(xen_machphys_mfn_list); /* * Returns the location in virtual address space of the machine_to_phys * mapping table. Architectures which do not have a m2p table, or which do not * map it by default into guest address space, do not implement this command. * arg == addr of xen_machphys_mapping_t. */ #define XENMEM_machphys_mapping 12 struct xen_machphys_mapping { xen_ulong_t v_start, v_end; /* Start and end virtual addresses. */ xen_ulong_t max_mfn; /* Maximum MFN that can be looked up. */ }; DEFINE_GUEST_HANDLE_STRUCT(xen_machphys_mapping_t); #define XENMAPSPACE_shared_info 0 /* shared info page */ #define XENMAPSPACE_grant_table 1 /* grant table page */ #define XENMAPSPACE_gmfn 2 /* GMFN */ #define XENMAPSPACE_gmfn_range 3 /* GMFN range, XENMEM_add_to_physmap only. */ #define XENMAPSPACE_gmfn_foreign 4 /* GMFN from another dom, * XENMEM_add_to_physmap_range only. */ #define XENMAPSPACE_dev_mmio 5 /* device mmio region */ /* * Sets the GPFN at which a particular page appears in the specified guest's * pseudophysical address space. * arg == addr of xen_add_to_physmap_t. */ #define XENMEM_add_to_physmap 7 struct xen_add_to_physmap { /* Which domain to change the mapping for. */ domid_t domid; /* Number of pages to go through for gmfn_range */ uint16_t size; /* Source mapping space. */ unsigned int space; /* Index into source mapping space. */ xen_ulong_t idx; /* GPFN where the source mapping page should appear. */ xen_pfn_t gpfn; }; DEFINE_GUEST_HANDLE_STRUCT(xen_add_to_physmap); /*** REMOVED ***/ /*#define XENMEM_translate_gpfn_list 8*/ #define XENMEM_add_to_physmap_range 23 struct xen_add_to_physmap_range { /* IN */ /* Which domain to change the mapping for. */ domid_t domid; uint16_t space; /* => enum phys_map_space */ /* Number of pages to go through */ uint16_t size; domid_t foreign_domid; /* IFF gmfn_foreign */ /* Indexes into space being mapped. */ GUEST_HANDLE(xen_ulong_t) idxs; /* GPFN in domid where the source mapping page should appear. */ GUEST_HANDLE(xen_pfn_t) gpfns; /* OUT */ /* Per index error code. */ GUEST_HANDLE(int) errs; }; DEFINE_GUEST_HANDLE_STRUCT(xen_add_to_physmap_range); /* * Returns the pseudo-physical memory map as it was when the domain * was started (specified by XENMEM_set_memory_map). * arg == addr of struct xen_memory_map. */ #define XENMEM_memory_map 9 struct xen_memory_map { /* * On call the number of entries which can be stored in buffer. On * return the number of entries which have been stored in * buffer. */ unsigned int nr_entries; /* * Entries in the buffer are in the same format as returned by the * BIOS INT 0x15 EAX=0xE820 call. */ GUEST_HANDLE(void) buffer; }; DEFINE_GUEST_HANDLE_STRUCT(xen_memory_map); /* * Returns the real physical memory map. Passes the same structure as * XENMEM_memory_map. * arg == addr of struct xen_memory_map. */ #define XENMEM_machine_memory_map 10 /* * Unmaps the page appearing at a particular GPFN from the specified guest's * pseudophysical address space. * arg == addr of xen_remove_from_physmap_t. */ #define XENMEM_remove_from_physmap 15 struct xen_remove_from_physmap { /* Which domain to change the mapping for. */ domid_t domid; /* GPFN of the current mapping of the page. */ xen_pfn_t gpfn; }; DEFINE_GUEST_HANDLE_STRUCT(xen_remove_from_physmap); /* * Get the pages for a particular guest resource, so that they can be * mapped directly by a tools domain. */ #define XENMEM_acquire_resource 28 struct xen_mem_acquire_resource { /* IN - The domain whose resource is to be mapped */ domid_t domid; /* IN - the type of resource */ uint16_t type; #define XENMEM_resource_ioreq_server 0 #define XENMEM_resource_grant_table 1 /* * IN - a type-specific resource identifier, which must be zero * unless stated otherwise. * * type == XENMEM_resource_ioreq_server -> id == ioreq server id * type == XENMEM_resource_grant_table -> id defined below */ uint32_t id; #define XENMEM_resource_grant_table_id_shared 0 #define XENMEM_resource_grant_table_id_status 1 /* IN/OUT - As an IN parameter number of frames of the resource * to be mapped. However, if the specified value is 0 and * frame_list is NULL then this field will be set to the * maximum value supported by the implementation on return. */ uint32_t nr_frames; /* * OUT - Must be zero on entry. On return this may contain a bitwise * OR of the following values. */ uint32_t flags; /* The resource pages have been assigned to the calling domain */ #define _XENMEM_rsrc_acq_caller_owned 0 #define XENMEM_rsrc_acq_caller_owned (1u << _XENMEM_rsrc_acq_caller_owned) /* * IN - the index of the initial frame to be mapped. This parameter * is ignored if nr_frames is 0. */ uint64_t frame; #define XENMEM_resource_ioreq_server_frame_bufioreq 0 #define XENMEM_resource_ioreq_server_frame_ioreq(n) (1 + (n)) /* * IN/OUT - If the tools domain is PV then, upon return, frame_list * will be populated with the MFNs of the resource. * If the tools domain is HVM then it is expected that, on * entry, frame_list will be populated with a list of GFNs * that will be mapped to the MFNs of the resource. * If -EIO is returned then the frame_list has only been * partially mapped and it is up to the caller to unmap all * the GFNs. * This parameter may be NULL if nr_frames is 0. */ GUEST_HANDLE(xen_pfn_t) frame_list; }; DEFINE_GUEST_HANDLE_STRUCT(xen_mem_acquire_resource); #endif /* __XEN_PUBLIC_MEMORY_H__ */ PK ! ���! �! xen/interface/vcpu.hnu �[��� /****************************************************************************** * vcpu.h * * VCPU initialisation, query, and hotplug. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. * * Copyright (c) 2005, Keir Fraser <keir@xensource.com> */ #ifndef __XEN_PUBLIC_VCPU_H__ #define __XEN_PUBLIC_VCPU_H__ /* * Prototype for this hypercall is: * int vcpu_op(int cmd, int vcpuid, void *extra_args) * @cmd == VCPUOP_??? (VCPU operation). * @vcpuid == VCPU to operate on. * @extra_args == Operation-specific extra arguments (NULL if none). */ /* * Initialise a VCPU. Each VCPU can be initialised only once. A * newly-initialised VCPU will not run until it is brought up by VCPUOP_up. * * @extra_arg == pointer to vcpu_guest_context structure containing initial * state for the VCPU. */ #define VCPUOP_initialise 0 /* * Bring up a VCPU. This makes the VCPU runnable. This operation will fail * if the VCPU has not been initialised (VCPUOP_initialise). */ #define VCPUOP_up 1 /* * Bring down a VCPU (i.e., make it non-runnable). * There are a few caveats that callers should observe: * 1. This operation may return, and VCPU_is_up may return false, before the * VCPU stops running (i.e., the command is asynchronous). It is a good * idea to ensure that the VCPU has entered a non-critical loop before * bringing it down. Alternatively, this operation is guaranteed * synchronous if invoked by the VCPU itself. * 2. After a VCPU is initialised, there is currently no way to drop all its * references to domain memory. Even a VCPU that is down still holds * memory references via its pagetable base pointer and GDT. It is good * practise to move a VCPU onto an 'idle' or default page table, LDT and * GDT before bringing it down. */ #define VCPUOP_down 2 /* Returns 1 if the given VCPU is up. */ #define VCPUOP_is_up 3 /* * Return information about the state and running time of a VCPU. * @extra_arg == pointer to vcpu_runstate_info structure. */ #define VCPUOP_get_runstate_info 4 struct vcpu_runstate_info { /* VCPU's current state (RUNSTATE_*). */ int state; /* When was current state entered (system time, ns)? */ uint64_t state_entry_time; /* * Update indicator set in state_entry_time: * When activated via VMASST_TYPE_runstate_update_flag, set during * updates in guest memory mapped copy of vcpu_runstate_info. */ #define XEN_RUNSTATE_UPDATE (1ULL << 63) /* * Time spent in each RUNSTATE_* (ns). The sum of these times is * guaranteed not to drift from system time. */ uint64_t time[4]; }; DEFINE_GUEST_HANDLE_STRUCT(vcpu_runstate_info); /* VCPU is currently running on a physical CPU. */ #define RUNSTATE_running 0 /* VCPU is runnable, but not currently scheduled on any physical CPU. */ #define RUNSTATE_runnable 1 /* VCPU is blocked (a.k.a. idle). It is therefore not runnable. */ #define RUNSTATE_blocked 2 /* * VCPU is not runnable, but it is not blocked. * This is a 'catch all' state for things like hotplug and pauses by the * system administrator (or for critical sections in the hypervisor). * RUNSTATE_blocked dominates this state (it is the preferred state). */ #define RUNSTATE_offline 3 /* * Register a shared memory area from which the guest may obtain its own * runstate information without needing to execute a hypercall. * Notes: * 1. The registered address may be virtual or physical, depending on the * platform. The virtual address should be registered on x86 systems. * 2. Only one shared area may be registered per VCPU. The shared area is * updated by the hypervisor each time the VCPU is scheduled. Thus * runstate.state will always be RUNSTATE_running and * runstate.state_entry_time will indicate the system time at which the * VCPU was last scheduled to run. * @extra_arg == pointer to vcpu_register_runstate_memory_area structure. */ #define VCPUOP_register_runstate_memory_area 5 struct vcpu_register_runstate_memory_area { union { GUEST_HANDLE(vcpu_runstate_info) h; struct vcpu_runstate_info *v; uint64_t p; } addr; }; /* * Set or stop a VCPU's periodic timer. Every VCPU has one periodic timer * which can be set via these commands. Periods smaller than one millisecond * may not be supported. */ #define VCPUOP_set_periodic_timer 6 /* arg == vcpu_set_periodic_timer_t */ #define VCPUOP_stop_periodic_timer 7 /* arg == NULL */ struct vcpu_set_periodic_timer { uint64_t period_ns; }; DEFINE_GUEST_HANDLE_STRUCT(vcpu_set_periodic_timer); /* * Set or stop a VCPU's single-shot timer. Every VCPU has one single-shot * timer which can be set via these commands. */ #define VCPUOP_set_singleshot_timer 8 /* arg == vcpu_set_singleshot_timer_t */ #define VCPUOP_stop_singleshot_timer 9 /* arg == NULL */ struct vcpu_set_singleshot_timer { uint64_t timeout_abs_ns; uint32_t flags; /* VCPU_SSHOTTMR_??? */ }; DEFINE_GUEST_HANDLE_STRUCT(vcpu_set_singleshot_timer); /* Flags to VCPUOP_set_singleshot_timer. */ /* Require the timeout to be in the future (return -ETIME if it's passed). */ #define _VCPU_SSHOTTMR_future (0) #define VCPU_SSHOTTMR_future (1U << _VCPU_SSHOTTMR_future) /* * Register a memory location in the guest address space for the * vcpu_info structure. This allows the guest to place the vcpu_info * structure in a convenient place, such as in a per-cpu data area. * The pointer need not be page aligned, but the structure must not * cross a page boundary. */ #define VCPUOP_register_vcpu_info 10 /* arg == struct vcpu_info */ struct vcpu_register_vcpu_info { uint64_t mfn; /* mfn of page to place vcpu_info */ uint32_t offset; /* offset within page */ uint32_t rsvd; /* unused */ }; DEFINE_GUEST_HANDLE_STRUCT(vcpu_register_vcpu_info); /* Send an NMI to the specified VCPU. @extra_arg == NULL. */ #define VCPUOP_send_nmi 11 /* * Get the physical ID information for a pinned vcpu's underlying physical * processor. The physical ID informmation is architecture-specific. * On x86: id[31:0]=apic_id, id[63:32]=acpi_id. * This command returns -EINVAL if it is not a valid operation for this VCPU. */ #define VCPUOP_get_physid 12 /* arg == vcpu_get_physid_t */ struct vcpu_get_physid { uint64_t phys_id; }; DEFINE_GUEST_HANDLE_STRUCT(vcpu_get_physid); #define xen_vcpu_physid_to_x86_apicid(physid) ((uint32_t)(physid)) #define xen_vcpu_physid_to_x86_acpiid(physid) ((uint32_t)((physid) >> 32)) /* * Register a memory location to get a secondary copy of the vcpu time * parameters. The master copy still exists as part of the vcpu shared * memory area, and this secondary copy is updated whenever the master copy * is updated (and using the same versioning scheme for synchronisation). * * The intent is that this copy may be mapped (RO) into userspace so * that usermode can compute system time using the time info and the * tsc. Usermode will see an array of vcpu_time_info structures, one * for each vcpu, and choose the right one by an existing mechanism * which allows it to get the current vcpu number (such as via a * segment limit). It can then apply the normal algorithm to compute * system time from the tsc. * * @extra_arg == pointer to vcpu_register_time_info_memory_area structure. */ #define VCPUOP_register_vcpu_time_memory_area 13 DEFINE_GUEST_HANDLE_STRUCT(vcpu_time_info); struct vcpu_register_time_memory_area { union { GUEST_HANDLE(vcpu_time_info) h; struct pvclock_vcpu_time_info *v; uint64_t p; } addr; }; DEFINE_GUEST_HANDLE_STRUCT(vcpu_register_time_memory_area); #endif /* __XEN_PUBLIC_VCPU_H__ */ PK ! 'K�$� � xen/interface/io/protocols.hnu �[��� /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __XEN_PROTOCOLS_H__ #define __XEN_PROTOCOLS_H__ #define XEN_IO_PROTO_ABI_X86_32 "x86_32-abi" #define XEN_IO_PROTO_ABI_X86_64 "x86_64-abi" #define XEN_IO_PROTO_ABI_POWERPC64 "powerpc64-abi" #define XEN_IO_PROTO_ABI_ARM "arm-abi" #if defined(__i386__) # define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_X86_32 #elif defined(__x86_64__) # define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_X86_64 #elif defined(__powerpc64__) # define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_POWERPC64 #elif defined(__arm__) || defined(__aarch64__) # define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_ARM #else # error arch fixup needed here #endif #endif PK ! �F xen/interface/io/pciif.hnu �[��� /* * PCI Backend/Frontend Common Data Structures & Macros * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. * * Author: Ryan Wilson <hap9@epoch.ncsc.mil> */ #ifndef __XEN_PCI_COMMON_H__ #define __XEN_PCI_COMMON_H__ /* Be sure to bump this number if you change this file */ #define XEN_PCI_MAGIC "7" /* xen_pci_sharedinfo flags */ #define _XEN_PCIF_active (0) #define XEN_PCIF_active (1<<_XEN_PCIF_active) #define _XEN_PCIB_AERHANDLER (1) #define XEN_PCIB_AERHANDLER (1<<_XEN_PCIB_AERHANDLER) #define _XEN_PCIB_active (2) #define XEN_PCIB_active (1<<_XEN_PCIB_active) /* xen_pci_op commands */ #define XEN_PCI_OP_conf_read (0) #define XEN_PCI_OP_conf_write (1) #define XEN_PCI_OP_enable_msi (2) #define XEN_PCI_OP_disable_msi (3) #define XEN_PCI_OP_enable_msix (4) #define XEN_PCI_OP_disable_msix (5) #define XEN_PCI_OP_aer_detected (6) #define XEN_PCI_OP_aer_resume (7) #define XEN_PCI_OP_aer_mmio (8) #define XEN_PCI_OP_aer_slotreset (9) /* xen_pci_op error numbers */ #define XEN_PCI_ERR_success (0) #define XEN_PCI_ERR_dev_not_found (-1) #define XEN_PCI_ERR_invalid_offset (-2) #define XEN_PCI_ERR_access_denied (-3) #define XEN_PCI_ERR_not_implemented (-4) /* XEN_PCI_ERR_op_failed - backend failed to complete the operation */ #define XEN_PCI_ERR_op_failed (-5) /* * it should be PAGE_SIZE-sizeof(struct xen_pci_op))/sizeof(struct msix_entry)) * Should not exceed 128 */ #define SH_INFO_MAX_VEC 128 struct xen_msix_entry { uint16_t vector; uint16_t entry; }; struct xen_pci_op { /* IN: what action to perform: XEN_PCI_OP_* */ uint32_t cmd; /* OUT: will contain an error number (if any) from errno.h */ int32_t err; /* IN: which device to touch */ uint32_t domain; /* PCI Domain/Segment */ uint32_t bus; uint32_t devfn; /* IN: which configuration registers to touch */ int32_t offset; int32_t size; /* IN/OUT: Contains the result after a READ or the value to WRITE */ uint32_t value; /* IN: Contains extra infor for this operation */ uint32_t info; /*IN: param for msi-x */ struct xen_msix_entry msix_entries[SH_INFO_MAX_VEC]; }; /*used for pcie aer handling*/ struct xen_pcie_aer_op { /* IN: what action to perform: XEN_PCI_OP_* */ uint32_t cmd; /*IN/OUT: return aer_op result or carry error_detected state as input*/ int32_t err; /* IN: which device to touch */ uint32_t domain; /* PCI Domain/Segment*/ uint32_t bus; uint32_t devfn; }; struct xen_pci_sharedinfo { /* flags - XEN_PCIF_* */ uint32_t flags; struct xen_pci_op op; struct xen_pcie_aer_op aer_op; }; #endif /* __XEN_PCI_COMMON_H__ */ PK ! �>�i�^ �^ xen/interface/io/kbdif.hnu �[��� /* * kbdif.h -- Xen virtual keyboard/mouse * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. * * Copyright (C) 2005 Anthony Liguori <aliguori@us.ibm.com> * Copyright (C) 2006 Red Hat, Inc., Markus Armbruster <armbru@redhat.com> */ #ifndef __XEN_PUBLIC_IO_KBDIF_H__ #define __XEN_PUBLIC_IO_KBDIF_H__ /* ***************************************************************************** * Feature and Parameter Negotiation ***************************************************************************** * * The two halves of a para-virtual driver utilize nodes within * XenStore to communicate capabilities and to negotiate operating parameters. * This section enumerates these nodes which reside in the respective front and * backend portions of XenStore, following XenBus convention. * * All data in XenStore is stored as strings. Nodes specifying numeric * values are encoded in decimal. Integer value ranges listed below are * expressed as fixed sized integer types capable of storing the conversion * of a properly formated node string, without loss of information. * ***************************************************************************** * Backend XenBus Nodes ***************************************************************************** * *---------------------------- Features supported ---------------------------- * * Capable backend advertises supported features by publishing * corresponding entries in XenStore and puts 1 as the value of the entry. * If a feature is not supported then 0 must be set or feature entry omitted. * * feature-disable-keyboard * Values: <uint> * * If there is no need to expose a virtual keyboard device by the * frontend then this must be set to 1. * * feature-disable-pointer * Values: <uint> * * If there is no need to expose a virtual pointer device by the * frontend then this must be set to 1. * * feature-abs-pointer * Values: <uint> * * Backends, which support reporting of absolute coordinates for pointer * device should set this to 1. * * feature-multi-touch * Values: <uint> * * Backends, which support reporting of multi-touch events * should set this to 1. * * feature-raw-pointer * Values: <uint> * * Backends, which support reporting raw (unscaled) absolute coordinates * for pointer devices should set this to 1. Raw (unscaled) values have * a range of [0, 0x7fff]. * *----------------------- Device Instance Parameters ------------------------ * * unique-id * Values: <string> * * After device instance initialization it is assigned a unique ID, * so every instance of the frontend can be identified by the backend * by this ID. This can be UUID or such. * *------------------------- Pointer Device Parameters ------------------------ * * width * Values: <uint> * * Maximum X coordinate (width) to be used by the frontend * while reporting input events, pixels, [0; UINT32_MAX]. * * height * Values: <uint> * * Maximum Y coordinate (height) to be used by the frontend * while reporting input events, pixels, [0; UINT32_MAX]. * *----------------------- Multi-touch Device Parameters ---------------------- * * multi-touch-num-contacts * Values: <uint> * * Number of simultaneous touches reported. * * multi-touch-width * Values: <uint> * * Width of the touch area to be used by the frontend * while reporting input events, pixels, [0; UINT32_MAX]. * * multi-touch-height * Values: <uint> * * Height of the touch area to be used by the frontend * while reporting input events, pixels, [0; UINT32_MAX]. * ***************************************************************************** * Frontend XenBus Nodes ***************************************************************************** * *------------------------------ Feature request ----------------------------- * * Capable frontend requests features from backend via setting corresponding * entries to 1 in XenStore. Requests for features not advertised as supported * by the backend have no effect. * * request-abs-pointer * Values: <uint> * * Request backend to report absolute pointer coordinates * (XENKBD_TYPE_POS) instead of relative ones (XENKBD_TYPE_MOTION). * * request-multi-touch * Values: <uint> * * Request backend to report multi-touch events. * * request-raw-pointer * Values: <uint> * * Request backend to report raw unscaled absolute pointer coordinates. * This option is only valid if request-abs-pointer is also set. * Raw unscaled coordinates have the range [0, 0x7fff] * *----------------------- Request Transport Parameters ----------------------- * * event-channel * Values: <uint> * * The identifier of the Xen event channel used to signal activity * in the ring buffer. * * page-gref * Values: <uint> * * The Xen grant reference granting permission for the backend to map * a sole page in a single page sized event ring buffer. * * page-ref * Values: <uint> * * OBSOLETE, not recommended for use. * PFN of the shared page. */ /* * EVENT CODES. */ #define XENKBD_TYPE_MOTION 1 #define XENKBD_TYPE_RESERVED 2 #define XENKBD_TYPE_KEY 3 #define XENKBD_TYPE_POS 4 #define XENKBD_TYPE_MTOUCH 5 /* Multi-touch event sub-codes */ #define XENKBD_MT_EV_DOWN 0 #define XENKBD_MT_EV_UP 1 #define XENKBD_MT_EV_MOTION 2 #define XENKBD_MT_EV_SYN 3 #define XENKBD_MT_EV_SHAPE 4 #define XENKBD_MT_EV_ORIENT 5 /* * CONSTANTS, XENSTORE FIELD AND PATH NAME STRINGS, HELPERS. */ #define XENKBD_DRIVER_NAME "vkbd" #define XENKBD_FIELD_FEAT_DSBL_KEYBRD "feature-disable-keyboard" #define XENKBD_FIELD_FEAT_DSBL_POINTER "feature-disable-pointer" #define XENKBD_FIELD_FEAT_ABS_POINTER "feature-abs-pointer" #define XENKBD_FIELD_FEAT_RAW_POINTER "feature-raw-pointer" #define XENKBD_FIELD_FEAT_MTOUCH "feature-multi-touch" #define XENKBD_FIELD_REQ_ABS_POINTER "request-abs-pointer" #define XENKBD_FIELD_REQ_RAW_POINTER "request-raw-pointer" #define XENKBD_FIELD_REQ_MTOUCH "request-multi-touch" #define XENKBD_FIELD_RING_GREF "page-gref" #define XENKBD_FIELD_EVT_CHANNEL "event-channel" #define XENKBD_FIELD_WIDTH "width" #define XENKBD_FIELD_HEIGHT "height" #define XENKBD_FIELD_MT_WIDTH "multi-touch-width" #define XENKBD_FIELD_MT_HEIGHT "multi-touch-height" #define XENKBD_FIELD_MT_NUM_CONTACTS "multi-touch-num-contacts" #define XENKBD_FIELD_UNIQUE_ID "unique-id" /* OBSOLETE, not recommended for use */ #define XENKBD_FIELD_RING_REF "page-ref" /* ***************************************************************************** * Description of the protocol between frontend and backend driver. ***************************************************************************** * * The two halves of a Para-virtual driver communicate with * each other using a shared page and an event channel. * Shared page contains a ring with event structures. * * All reserved fields in the structures below must be 0. * ***************************************************************************** * Backend to frontend events ***************************************************************************** * * Frontends should ignore unknown in events. * All event packets have the same length (40 octets) * All event packets have common header: * * 0 octet * +-----------------+ * | type | * +-----------------+ * type - uint8_t, event code, XENKBD_TYPE_??? * * * Pointer relative movement event * 0 1 2 3 octet * +----------------+----------------+----------------+----------------+ * | _TYPE_MOTION | reserved | 4 * +----------------+----------------+----------------+----------------+ * | rel_x | 8 * +----------------+----------------+----------------+----------------+ * | rel_y | 12 * +----------------+----------------+----------------+----------------+ * | rel_z | 16 * +----------------+----------------+----------------+----------------+ * | reserved | 20 * +----------------+----------------+----------------+----------------+ * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/| * +----------------+----------------+----------------+----------------+ * | reserved | 40 * +----------------+----------------+----------------+----------------+ * * rel_x - int32_t, relative X motion * rel_y - int32_t, relative Y motion * rel_z - int32_t, relative Z motion (wheel) */ struct xenkbd_motion { uint8_t type; int32_t rel_x; int32_t rel_y; int32_t rel_z; }; /* * Key event (includes pointer buttons) * 0 1 2 3 octet * +----------------+----------------+----------------+----------------+ * | _TYPE_KEY | pressed | reserved | 4 * +----------------+----------------+----------------+----------------+ * | keycode | 8 * +----------------+----------------+----------------+----------------+ * | reserved | 12 * +----------------+----------------+----------------+----------------+ * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/| * +----------------+----------------+----------------+----------------+ * | reserved | 40 * +----------------+----------------+----------------+----------------+ * * pressed - uint8_t, 1 if pressed; 0 otherwise * keycode - uint32_t, KEY_* from linux/input.h */ struct xenkbd_key { uint8_t type; uint8_t pressed; uint32_t keycode; }; /* * Pointer absolute position event * 0 1 2 3 octet * +----------------+----------------+----------------+----------------+ * | _TYPE_POS | reserved | 4 * +----------------+----------------+----------------+----------------+ * | abs_x | 8 * +----------------+----------------+----------------+----------------+ * | abs_y | 12 * +----------------+----------------+----------------+----------------+ * | rel_z | 16 * +----------------+----------------+----------------+----------------+ * | reserved | 20 * +----------------+----------------+----------------+----------------+ * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/| * +----------------+----------------+----------------+----------------+ * | reserved | 40 * +----------------+----------------+----------------+----------------+ * * abs_x - int32_t, absolute X position (in FB pixels) * abs_y - int32_t, absolute Y position (in FB pixels) * rel_z - int32_t, relative Z motion (wheel) */ struct xenkbd_position { uint8_t type; int32_t abs_x; int32_t abs_y; int32_t rel_z; }; /* * Multi-touch event and its sub-types * * All multi-touch event packets have common header: * * 0 1 2 3 octet * +----------------+----------------+----------------+----------------+ * | _TYPE_MTOUCH | event_type | contact_id | reserved | 4 * +----------------+----------------+----------------+----------------+ * | reserved | 8 * +----------------+----------------+----------------+----------------+ * * event_type - unt8_t, multi-touch event sub-type, XENKBD_MT_EV_??? * contact_id - unt8_t, ID of the contact * * Touch interactions can consist of one or more contacts. * For each contact, a series of events is generated, starting * with a down event, followed by zero or more motion events, * and ending with an up event. Events relating to the same * contact point can be identified by the ID of the sequence: contact ID. * Contact ID may be reused after XENKBD_MT_EV_UP event and * is in the [0; XENKBD_FIELD_NUM_CONTACTS - 1] range. * * For further information please refer to documentation on Wayland [1], * Linux [2] and Windows [3] multi-touch support. * * [1] https://cgit.freedesktop.org/wayland/wayland/tree/protocol/wayland.xml * [2] https://www.kernel.org/doc/Documentation/input/multi-touch-protocol.rst * [3] https://msdn.microsoft.com/en-us/library/jj151564(v=vs.85).aspx * * * Multi-touch down event - sent when a new touch is made: touch is assigned * a unique contact ID, sent with this and consequent events related * to this touch. * 0 1 2 3 octet * +----------------+----------------+----------------+----------------+ * | _TYPE_MTOUCH | _MT_EV_DOWN | contact_id | reserved | 4 * +----------------+----------------+----------------+----------------+ * | reserved | 8 * +----------------+----------------+----------------+----------------+ * | abs_x | 12 * +----------------+----------------+----------------+----------------+ * | abs_y | 16 * +----------------+----------------+----------------+----------------+ * | reserved | 20 * +----------------+----------------+----------------+----------------+ * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/| * +----------------+----------------+----------------+----------------+ * | reserved | 40 * +----------------+----------------+----------------+----------------+ * * abs_x - int32_t, absolute X position, in pixels * abs_y - int32_t, absolute Y position, in pixels * * Multi-touch contact release event * 0 1 2 3 octet * +----------------+----------------+----------------+----------------+ * | _TYPE_MTOUCH | _MT_EV_UP | contact_id | reserved | 4 * +----------------+----------------+----------------+----------------+ * | reserved | 8 * +----------------+----------------+----------------+----------------+ * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/| * +----------------+----------------+----------------+----------------+ * | reserved | 40 * +----------------+----------------+----------------+----------------+ * * Multi-touch motion event * 0 1 2 3 octet * +----------------+----------------+----------------+----------------+ * | _TYPE_MTOUCH | _MT_EV_MOTION | contact_id | reserved | 4 * +----------------+----------------+----------------+----------------+ * | reserved | 8 * +----------------+----------------+----------------+----------------+ * | abs_x | 12 * +----------------+----------------+----------------+----------------+ * | abs_y | 16 * +----------------+----------------+----------------+----------------+ * | reserved | 20 * +----------------+----------------+----------------+----------------+ * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/| * +----------------+----------------+----------------+----------------+ * | reserved | 40 * +----------------+----------------+----------------+----------------+ * * abs_x - int32_t, absolute X position, in pixels, * abs_y - int32_t, absolute Y position, in pixels, * * Multi-touch input synchronization event - shows end of a set of events * which logically belong together. * 0 1 2 3 octet * +----------------+----------------+----------------+----------------+ * | _TYPE_MTOUCH | _MT_EV_SYN | contact_id | reserved | 4 * +----------------+----------------+----------------+----------------+ * | reserved | 8 * +----------------+----------------+----------------+----------------+ * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/| * +----------------+----------------+----------------+----------------+ * | reserved | 40 * +----------------+----------------+----------------+----------------+ * * Multi-touch shape event - touch point's shape has changed its shape. * Shape is approximated by an ellipse through the major and minor axis * lengths: major is the longer diameter of the ellipse and minor is the * shorter one. Center of the ellipse is reported via * XENKBD_MT_EV_DOWN/XENKBD_MT_EV_MOTION events. * 0 1 2 3 octet * +----------------+----------------+----------------+----------------+ * | _TYPE_MTOUCH | _MT_EV_SHAPE | contact_id | reserved | 4 * +----------------+----------------+----------------+----------------+ * | reserved | 8 * +----------------+----------------+----------------+----------------+ * | major | 12 * +----------------+----------------+----------------+----------------+ * | minor | 16 * +----------------+----------------+----------------+----------------+ * | reserved | 20 * +----------------+----------------+----------------+----------------+ * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/| * +----------------+----------------+----------------+----------------+ * | reserved | 40 * +----------------+----------------+----------------+----------------+ * * major - unt32_t, length of the major axis, pixels * minor - unt32_t, length of the minor axis, pixels * * Multi-touch orientation event - touch point's shape has changed * its orientation: calculated as a clockwise angle between the major axis * of the ellipse and positive Y axis in degrees, [-180; +180]. * 0 1 2 3 octet * +----------------+----------------+----------------+----------------+ * | _TYPE_MTOUCH | _MT_EV_ORIENT | contact_id | reserved | 4 * +----------------+----------------+----------------+----------------+ * | reserved | 8 * +----------------+----------------+----------------+----------------+ * | orientation | reserved | 12 * +----------------+----------------+----------------+----------------+ * | reserved | 16 * +----------------+----------------+----------------+----------------+ * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/| * +----------------+----------------+----------------+----------------+ * | reserved | 40 * +----------------+----------------+----------------+----------------+ * * orientation - int16_t, clockwise angle of the major axis */ struct xenkbd_mtouch { uint8_t type; /* XENKBD_TYPE_MTOUCH */ uint8_t event_type; /* XENKBD_MT_EV_??? */ uint8_t contact_id; uint8_t reserved[5]; /* reserved for the future use */ union { struct { int32_t abs_x; /* absolute X position, pixels */ int32_t abs_y; /* absolute Y position, pixels */ } pos; struct { uint32_t major; /* length of the major axis, pixels */ uint32_t minor; /* length of the minor axis, pixels */ } shape; int16_t orientation; /* clockwise angle of the major axis */ } u; }; #define XENKBD_IN_EVENT_SIZE 40 union xenkbd_in_event { uint8_t type; struct xenkbd_motion motion; struct xenkbd_key key; struct xenkbd_position pos; struct xenkbd_mtouch mtouch; char pad[XENKBD_IN_EVENT_SIZE]; }; /* ***************************************************************************** * Frontend to backend events ***************************************************************************** * * Out events may be sent only when requested by backend, and receipt * of an unknown out event is an error. * No out events currently defined. * All event packets have the same length (40 octets) * All event packets have common header: * 0 octet * +-----------------+ * | type | * +-----------------+ * type - uint8_t, event code */ #define XENKBD_OUT_EVENT_SIZE 40 union xenkbd_out_event { uint8_t type; char pad[XENKBD_OUT_EVENT_SIZE]; }; /* ***************************************************************************** * Shared page ***************************************************************************** */ #define XENKBD_IN_RING_SIZE 2048 #define XENKBD_IN_RING_LEN (XENKBD_IN_RING_SIZE / XENKBD_IN_EVENT_SIZE) #define XENKBD_IN_RING_OFFS 1024 #define XENKBD_IN_RING(page) \ ((union xenkbd_in_event *)((char *)(page) + XENKBD_IN_RING_OFFS)) #define XENKBD_IN_RING_REF(page, idx) \ (XENKBD_IN_RING((page))[(idx) % XENKBD_IN_RING_LEN]) #define XENKBD_OUT_RING_SIZE 1024 #define XENKBD_OUT_RING_LEN (XENKBD_OUT_RING_SIZE / XENKBD_OUT_EVENT_SIZE) #define XENKBD_OUT_RING_OFFS (XENKBD_IN_RING_OFFS + XENKBD_IN_RING_SIZE) #define XENKBD_OUT_RING(page) \ ((union xenkbd_out_event *)((char *)(page) + XENKBD_OUT_RING_OFFS)) #define XENKBD_OUT_RING_REF(page, idx) \ (XENKBD_OUT_RING((page))[(idx) % XENKBD_OUT_RING_LEN]) struct xenkbd_page { uint32_t in_cons, in_prod; uint32_t out_cons, out_prod; }; #endif /* __XEN_PUBLIC_IO_KBDIF_H__ */ PK ! �㠨 � xen/interface/io/tpmif.hnu �[��� /****************************************************************************** * tpmif.h * * TPM I/O interface for Xen guest OSes, v2 * * This file is in the public domain. * */ #ifndef __XEN_PUBLIC_IO_TPMIF_H__ #define __XEN_PUBLIC_IO_TPMIF_H__ /* * Xenbus state machine * * Device open: * 1. Both ends start in XenbusStateInitialising * 2. Backend transitions to InitWait (frontend does not wait on this step) * 3. Frontend populates ring-ref, event-channel, feature-protocol-v2 * 4. Frontend transitions to Initialised * 5. Backend maps grant and event channel, verifies feature-protocol-v2 * 6. Backend transitions to Connected * 7. Frontend verifies feature-protocol-v2, transitions to Connected * * Device close: * 1. State is changed to XenbusStateClosing * 2. Frontend transitions to Closed * 3. Backend unmaps grant and event, changes state to InitWait */ enum vtpm_shared_page_state { VTPM_STATE_IDLE, /* no contents / vTPM idle / cancel complete */ VTPM_STATE_SUBMIT, /* request ready / vTPM working */ VTPM_STATE_FINISH, /* response ready / vTPM idle */ VTPM_STATE_CANCEL, /* cancel requested / vTPM working */ }; /* The backend should only change state to IDLE or FINISH, while the * frontend should only change to SUBMIT or CANCEL. */ struct vtpm_shared_page { uint32_t length; /* request/response length in bytes */ uint8_t state; /* enum vtpm_shared_page_state */ uint8_t locality; /* for the current request */ uint8_t pad; uint8_t nr_extra_pages; /* extra pages for long packets; may be zero */ uint32_t extra_pages[]; /* grant IDs; length in nr_extra_pages */ }; #endif PK ! ){��"