725 int ip_append_data(struct sock *sk, 726 int getfrag(void *from, char *to, int offset, int len, 727 int odd, struct sk_buff *skb), 728 void *from, int length, int transhdrlen, 729 struct ipcm_cookie *ipc, struct rtable *rt, 730 unsigned int flags)
731 {
732 struct inet_opt *inet = inet_sk(sk); 733 struct sk_buff *skb; 734 735 struct ip_options *opt = NULL; 736 int hh_len; 737 int exthdrlen; 738 int mtu; 739 int copy; 740 int err; 741 int offset = 0; 742 unsigned int maxfraglen, fragheaderlen; 743 int csummode = CHECKSUM_NONE; 744 745 if (flags&MSG_PROBE) 746 return 0; 747 748 if (skb_queue_empty(&sk->sk_write_queue)) { 749 /* 750 * setup for corking. 751 */ 752 opt = ipc->opt; 753 if (opt) { 754 if (inet->cork.opt == NULL) { 755 inet->cork.opt = kmalloc(sizeof(struct ip_options) + 40, sk->sk_allocation); 756 if (unlikely(inet->cork.opt == NULL)) 757 return -ENOBUFS; 758 } 759 memcpy(inet->cork.opt, opt, sizeof(struct ip_options)+opt- >optlen); 760 inet->cork.flags |= IPCORK_OPT; 761 inet->cork.addr = ipc->addr; 762 } 763 dst_hold(&rt->u.dst); 764 inet->cork.fragsize = mtu = dst_pmtu(&rt->u.dst); 765 inet->cork.rt = rt; 766 inet->cork.length = 0; 767 sk->sk_sndmsg_page = NULL; 768 sk->sk_sndmsg_off = 0; 769 if ((exthdrlen = rt->u.dst.header_len) != 0) { 770 length += exthdrlen; 771 transhdrlen += exthdrlen; 772 } 773 } else { 774 rt = inet->cork.rt; 775 if (inet->cork.flags & IPCORK_OPT) 776 opt = inet->cork.opt; 777 778 transhdrlen = 0; 779 exthdrlen = 0; 780 mtu = inet->cork.fragsize; 781 } 782 hh_len = LL_RESERVED_SPACE(rt->u.dst.dev); 783 784 fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0); 785 maxfraglen = ((mtu-fragheaderlen) & ~7) + fragheaderlen; 786 787 if (inet->cork.length + length > 0xFFFF - fragheaderlen) { 788 ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->dport, mtu- exthdrlen); 789 return -EMSGSIZE; 790 } 791 792 /* 793 * transhdrlen > 0 means that this is the first fragment and we wish 794 * it won't be fragmented in the future. 795 */ 796 if (transhdrlen && 797 length + fragheaderlen <= maxfraglen && 798 rt->u.dst.dev->features& (NETIF_F_IP_CSUM|NETIF_F_NO_CSUM|NETIF_F_HW_CSUM) && 799 !exthdrlen) 800 csummode = CHECKSUM_HW; 801 802 inet->cork.length += length; 803 804 /* So, what's going on in the loop below? 805 * 806 * We use calculated fragment length to generate chained skb, 807 * each of segments is IP fragment ready for sending to network after 808 * adding appropriate IP header. 809 * 810 * Mistake is: 811 * 812 * If mtu-fragheaderlen is not 0 modulo 8, we generate additional 813 * small fragment of length (mtu-fragheaderlen)%8, even though 814 * it is not necessary. Not a big bug, but needs a fix. 815 */ 816 817 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) 818 goto alloc_new_skb; 819 820 while (length > 0) { 821 if ((copy = maxfraglen - skb->len) <= 0) { 822 char *data; 823 unsigned int datalen; 824 unsigned int fraglen; 825 unsigned int alloclen; 826 BUG_TRAP(copy == 0); 827 828 alloc_new_skb: 829 datalen = maxfraglen - fragheaderlen; 830 if (datalen > length) 831 datalen = length; 832 833 fraglen = datalen + fragheaderlen; 834 if ((flags & MSG_MORE) && 835 !(rt->u.dst.dev->features&NETIF_F_SG)) 836 alloclen = maxfraglen; 837 else 838 alloclen = datalen + fragheaderlen; 839 840 /* The last fragment gets additional space at tail. 841 * Note, with MSG_MORE we overallocate on fragments, 842 * because we have no idea what fragment will be 843 * the last. 844 */ 845 if (datalen == length) 846 alloclen += rt->u.dst.trailer_len; 847 848 if (transhdrlen) { 849 skb = sock_alloc_send_skb(sk, 850 alloclen + hh_len + 15, 851 (flags & MSG_DONTWAIT), &err); 852 } else { 853 skb = NULL; 854 if (atomic_read(&sk->sk_wmem_alloc) <= 855 2 * sk->sk_sndbuf) 856 skb = sock_wmalloc(sk, 857 alloclen + hh_len + 15, 1, 858 sk->sk_allocation); 859 if (unlikely(skb == NULL)) 860 err = -ENOBUFS; 861 } 862 if (skb == NULL) 863 goto error; 864 865 /* 866 * Fill in the control structures 867 */ 868 skb->ip_summed = csummode; 869 skb->csum = 0; 870 skb_reserve(skb, hh_len); 871 872 /* 873 * Find where to start putting bytes. 874 */ 875 data = skb_put(skb, fraglen); 876 skb->nh.raw = data + exthdrlen; 877 data += fragheaderlen; 878 skb->h.raw = data + exthdrlen; 879 880 copy = datalen - transhdrlen; 881 if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, 0, skb) < 0) { 882 err = -EFAULT; 883 kfree_skb(skb); 884 goto error; 885 } 886 887 offset += copy; 888 length -= datalen; 889 transhdrlen = 0; 890 exthdrlen = 0; 891 csummode = CHECKSUM_NONE; 892 893 /* 894 * Put the packet on the pending queue. 895 */ 896 __skb_queue_tail(&sk->sk_write_queue, skb); 897 continue; 898 } 899 900 if (copy > length) 901 copy = length; 902 903 if (!(rt->u.dst.dev->features&NETIF_F_SG)) { 904 unsigned int off; 905 906 off = skb->len; 907 if (getfrag(from, skb_put(skb, copy), 908 offset, copy, off, skb) < 0) { 909 __skb_trim(skb, off); 910 err = -EFAULT; 911 goto error; 912 } 913 } else { 914 int i = skb_shinfo(skb)->nr_frags; 915 skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1]; 916 struct page *page = sk->sk_sndmsg_page; 917 int off = sk->sk_sndmsg_off; 918 unsigned int left; 919 920 if (page && (left = PAGE_SIZE - off) > 0) { 921 if (copy >= left) 922 copy = left; 923 if (page != frag->page) { 924 if (i == MAX_SKB_FRAGS) { 925 err = -EMSGSIZE; 926 goto error; 927 } 928 get_page(page); 929 skb_fill_page_desc(skb, i, page, sk- >sk_sndmsg_off, 0); 930 frag = &skb_shinfo(skb)->frags[i]; 931 } 932 } else if (i < MAX_SKB_FRAGS) { 933 if (copy > PAGE_SIZE) 934 copy = PAGE_SIZE; 935 page = alloc_pages(sk->sk_allocation, 0); 936 if (page == NULL) { 937 err = -ENOMEM; 938 goto error; 939 } 940 sk->sk_sndmsg_page = page; 941 sk->sk_sndmsg_off = 0; 942 943 skb_fill_page_desc(skb, i, page, 0, 0); 944 frag = &skb_shinfo(skb)->frags[i]; 945 skb->truesize += PAGE_SIZE; 946 atomic_add(PAGE_SIZE, &sk->sk_wmem_alloc); 947 } else { 948 err = -EMSGSIZE; 949 goto error; 950 } 951 if (getfrag(from, page_address(frag->page)+frag- >page_offset+frag->size, offset, copy, skb->len, skb) < 0) { 952 err = -EFAULT; 953 goto error; 954 } 955 sk->sk_sndmsg_off += copy; 956 frag->size += copy; 957 skb->len += copy; 958 skb->data_len += copy; 959 } 960 offset += copy; 961 length -= copy; 962 } 963 964 return 0; 965 966 error: 967 inet->cork.length -= length; 968 IP_INC_STATS(IPSTATS_MIB_OUTDISCARDS); 969 return err; 970 } 971