Commit Graph

42364 Commits

Author SHA1 Message Date
Michal Nazarewicz c185b07fc9 include/linux/kernel.h: deduplicate code implementing clamp* macros
Instead of open-coding clamp_t macro min_t and max_t the way clamp macro
does and instead of open-coding clamp_val simply use clamp_t.
Furthermore, normalise argument naming in the macros to be lo and hi.

Signed-off-by: Michal Nazarewicz <mina86@mina86.com>
Cc: Mark Rustad <mark.d.rustad@intel.com>
Cc: "Kirsher, Jeffrey T" <jeffrey.t.kirsher@intel.com>
Cc: Hagen Paul Pfeifer <hagen@jauu.net>
Cc: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2014-10-09 22:26:03 -04:00
Michal Nazarewicz 2e1d06e1c0 include/linux/kernel.h: rewrite min3, max3 and clamp using min and max
It appears that gcc is better at optimising a double call to min and max
rather than open coded min3 and max3.  This can be observed here:

    $ cat min-max.c
    #define min(x, y) ({				\
    	typeof(x) _min1 = (x);			\
    	typeof(y) _min2 = (y);			\
    	(void) (&_min1 == &_min2);		\
    	_min1 < _min2 ? _min1 : _min2; })
    #define min3(x, y, z) ({			\
    	typeof(x) _min1 = (x);			\
    	typeof(y) _min2 = (y);			\
    	typeof(z) _min3 = (z);			\
    	(void) (&_min1 == &_min2);		\
    	(void) (&_min1 == &_min3);		\
    	_min1 < _min2 ? (_min1 < _min3 ? _min1 : _min3) : \
    		(_min2 < _min3 ? _min2 : _min3); })

    int fmin3(int x, int y, int z) { return min3(x, y, z); }
    int fmin2(int x, int y, int z) { return min(min(x, y), z); }

    $ gcc -O2 -o min-max.s -S min-max.c; cat min-max.s
    	.file	"min-max.c"
    	.text
    	.p2align 4,,15
    	.globl	fmin3
    	.type	fmin3, @function
    fmin3:
    .LFB0:
    	.cfi_startproc
    	cmpl	%esi, %edi
    	jl	.L5
    	cmpl	%esi, %edx
    	movl	%esi, %eax
    	cmovle	%edx, %eax
    	ret
    	.p2align 4,,10
    	.p2align 3
    .L5:
    	cmpl	%edi, %edx
    	movl	%edi, %eax
    	cmovle	%edx, %eax
    	ret
    	.cfi_endproc
    .LFE0:
    	.size	fmin3, .-fmin3
    	.p2align 4,,15
    	.globl	fmin2
    	.type	fmin2, @function
    fmin2:
    .LFB1:
    	.cfi_startproc
    	cmpl	%edi, %esi
    	movl	%edx, %eax
    	cmovle	%esi, %edi
    	cmpl	%edx, %edi
    	cmovle	%edi, %eax
    	ret
    	.cfi_endproc
    .LFE1:
    	.size	fmin2, .-fmin2
    	.ident	"GCC: (Ubuntu/Linaro 4.6.3-1ubuntu5) 4.6.3"
    	.section	.note.GNU-stack,"",@progbits

fmin3 function, which uses open-coded min3 macro, is compiled into total
of ten instructions including a conditional branch, whereas fmin2
function, which uses two calls to min2 macro, is compiled into six
instructions with no branches.

Similarly, open-coded clamp produces the same code as clamp using min and
max macros, but the latter is much shorter:

    $ cat clamp.c
    #define clamp(val, min, max) ({			\
    	typeof(val) __val = (val);		\
    	typeof(min) __min = (min);		\
    	typeof(max) __max = (max);		\
    	(void) (&__val == &__min);		\
    	(void) (&__val == &__max);		\
    	__val = __val < __min ? __min: __val;	\
    	__val > __max ? __max: __val; })
    #define min(x, y) ({				\
    	typeof(x) _min1 = (x);			\
    	typeof(y) _min2 = (y);			\
    	(void) (&_min1 == &_min2);		\
    	_min1 < _min2 ? _min1 : _min2; })
    #define max(x, y) ({				\
    	typeof(x) _max1 = (x);			\
    	typeof(y) _max2 = (y);			\
    	(void) (&_max1 == &_max2);		\
    	_max1 > _max2 ? _max1 : _max2; })

    int fclamp(int v, int min, int max) { return clamp(v, min, max); }
    int fclampmm(int v, int min, int max) { return min(max(v, min), max); }

    $ gcc -O2 -o clamp.s -S clamp.c; cat clamp.s
    	.file	"clamp.c"
    	.text
    	.p2align 4,,15
    	.globl	fclamp
    	.type	fclamp, @function
    fclamp:
    .LFB0:
    	.cfi_startproc
    	cmpl	%edi, %esi
    	movl	%edx, %eax
    	cmovge	%esi, %edi
    	cmpl	%edx, %edi
    	cmovle	%edi, %eax
    	ret
    	.cfi_endproc
    .LFE0:
    	.size	fclamp, .-fclamp
    	.p2align 4,,15
    	.globl	fclampmm
    	.type	fclampmm, @function
    fclampmm:
    .LFB1:
    	.cfi_startproc
    	cmpl	%edi, %esi
    	cmovge	%esi, %edi
    	cmpl	%edi, %edx
    	movl	%edi, %eax
    	cmovle	%edx, %eax
    	ret
    	.cfi_endproc
    .LFE1:
    	.size	fclampmm, .-fclampmm
    	.ident	"GCC: (Ubuntu/Linaro 4.6.3-1ubuntu5) 4.6.3"
    	.section	.note.GNU-stack,"",@progbits

    Linux mpn-glaptop 3.13.0-29-generic #53~precise1-Ubuntu SMP Wed Jun 4 22:06:25 UTC 2014 x86_64 x86_64 x86_64 GNU/Linux
    gcc (Ubuntu/Linaro 4.6.3-1ubuntu5) 4.6.3
    Copyright (C) 2011 Free Software Foundation, Inc.
    This is free software; see the source for copying conditions.  There is NO
    warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.

    -rwx------ 1 mpn eng 51224656 Jun 17 14:15 vmlinux.before
    -rwx------ 1 mpn eng 51224608 Jun 17 13:57 vmlinux.after

48 bytes reduction.  The do_fault_around was a few instruction shorter
and as far as I can tell saved 12 bytes on the stack, i.e.:

    $ grep -e rsp -e pop -e push do_fault_around.*
    do_fault_around.before.s:push   %rbp
    do_fault_around.before.s:mov    %rsp,%rbp
    do_fault_around.before.s:push   %r13
    do_fault_around.before.s:push   %r12
    do_fault_around.before.s:push   %rbx
    do_fault_around.before.s:sub    $0x38,%rsp
    do_fault_around.before.s:add    $0x38,%rsp
    do_fault_around.before.s:pop    %rbx
    do_fault_around.before.s:pop    %r12
    do_fault_around.before.s:pop    %r13
    do_fault_around.before.s:pop    %rbp

    do_fault_around.after.s:push   %rbp
    do_fault_around.after.s:mov    %rsp,%rbp
    do_fault_around.after.s:push   %r12
    do_fault_around.after.s:push   %rbx
    do_fault_around.after.s:sub    $0x30,%rsp
    do_fault_around.after.s:add    $0x30,%rsp
    do_fault_around.after.s:pop    %rbx
    do_fault_around.after.s:pop    %r12
    do_fault_around.after.s:pop    %rbp

or here side-by-side:

    Before                    After
    push   %rbp               push   %rbp
    mov    %rsp,%rbp          mov    %rsp,%rbp
    push   %r13
    push   %r12               push   %r12
    push   %rbx               push   %rbx
    sub    $0x38,%rsp         sub    $0x30,%rsp
    add    $0x38,%rsp         add    $0x30,%rsp
    pop    %rbx               pop    %rbx
    pop    %r12               pop    %r12
    pop    %r13
    pop    %rbp               pop    %rbp

There are also fewer branches:

    $ grep ^j do_fault_around.*
    do_fault_around.before.s:jae    ffffffff812079b7
    do_fault_around.before.s:jmp    ffffffff812079c5
    do_fault_around.before.s:jmp    ffffffff81207a14
    do_fault_around.before.s:ja     ffffffff812079f9
    do_fault_around.before.s:jb     ffffffff81207a10
    do_fault_around.before.s:jmp    ffffffff81207a63
    do_fault_around.before.s:jne    ffffffff812079df

    do_fault_around.after.s:jmp    ffffffff812079fd
    do_fault_around.after.s:ja     ffffffff812079e2
    do_fault_around.after.s:jb     ffffffff812079f9
    do_fault_around.after.s:jmp    ffffffff81207a4c
    do_fault_around.after.s:jne    ffffffff812079c8

And here's with allyesconfig on a different machine:

    $ uname -a; gcc --version; ls -l vmlinux.*
    Linux erwin 3.14.7-mn #54 SMP Sun Jun 15 11:25:08 CEST 2014 x86_64 AMD Phenom(tm) II X3 710 Processor AuthenticAMD GNU/Linux
    gcc (GCC) 4.8.3
    Copyright (C) 2013 Free Software Foundation, Inc.
    This is free software; see the source for copying conditions.  There is NO
    warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.

    -rwx------ 1 mpn eng 437027411 Jun 20 16:04 vmlinux.before
    -rwx------ 1 mpn eng 437026881 Jun 20 15:30 vmlinux.after

530 bytes reduction.

Signed-off-by: Michal Nazarewicz <mina86@mina86.com>
Signed-off-by: Hagen Paul Pfeifer <hagen@jauu.net>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Cc: Hagen Paul Pfeifer <hagen@jauu.net>
Cc: David Rientjes <rientjes@google.com>
Cc: "Rustad, Mark D" <mark.d.rustad@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2014-10-09 22:26:03 -04:00
Minchan Kim 722cdc1723 zsmalloc: change return value unit of zs_get_total_size_bytes
zs_get_total_size_bytes returns a amount of memory zsmalloc consumed with
*byte unit* but zsmalloc operates *page unit* rather than byte unit so
let's change the API so benefit we could get is that reduce unnecessary
overhead (ie, change page unit with byte unit) in zsmalloc.

Since return type is pages, "zs_get_total_pages" is better than
"zs_get_total_size_bytes".

Signed-off-by: Minchan Kim <minchan@kernel.org>
Reviewed-by: Dan Streetman <ddstreet@ieee.org>
Cc: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Cc: Jerome Marchand <jmarchan@redhat.com>
Cc: <juno.choi@lge.com>
Cc: <seungho1.park@lge.com>
Cc: Luigi Semenzato <semenzato@google.com>
Cc: Nitin Gupta <ngupta@vflare.org>
Cc: Seth Jennings <sjennings@variantweb.net>
Cc: David Horner <ds2horner@gmail.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2014-10-09 22:26:02 -04:00
Konstantin Khlebnikov 09316c09dd mm/balloon_compaction: add vmstat counters and kpageflags bit
Always mark pages with PageBalloon even if balloon compaction is disabled
and expose this mark in /proc/kpageflags as KPF_BALLOON.

Also this patch adds three counters into /proc/vmstat: "balloon_inflate",
"balloon_deflate" and "balloon_migrate".  They accumulate balloon
activity.  Current size of balloon is (balloon_inflate - balloon_deflate)
pages.

All generic balloon code now gathered under option CONFIG_MEMORY_BALLOON.
It should be selected by ballooning driver which wants use this feature.
Currently virtio-balloon is the only user.

Signed-off-by: Konstantin Khlebnikov <k.khlebnikov@samsung.com>
Cc: Rafael Aquini <aquini@redhat.com>
Cc: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2014-10-09 22:26:01 -04:00
Konstantin Khlebnikov 9d1ba80564 mm/balloon_compaction: remove balloon mapping and flag AS_BALLOON_MAP
Now ballooned pages are detected using PageBalloon().  Fake mapping is no
longer required.  This patch links ballooned pages to balloon device using
field page->private instead of page->mapping.  Also this patch embeds
balloon_dev_info directly into struct virtio_balloon.

Signed-off-by: Konstantin Khlebnikov <k.khlebnikov@samsung.com>
Cc: Rafael Aquini <aquini@redhat.com>
Cc: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2014-10-09 22:26:01 -04:00
Konstantin Khlebnikov d6d86c0a7f mm/balloon_compaction: redesign ballooned pages management
Sasha Levin reported KASAN splash inside isolate_migratepages_range().
Problem is in the function __is_movable_balloon_page() which tests
AS_BALLOON_MAP in page->mapping->flags.  This function has no protection
against anonymous pages.  As result it tried to check address space flags
inside struct anon_vma.

Further investigation shows more problems in current implementation:

* Special branch in __unmap_and_move() never works:
  balloon_page_movable() checks page flags and page_count.  In
  __unmap_and_move() page is locked, reference counter is elevated, thus
  balloon_page_movable() always fails.  As a result execution goes to the
  normal migration path.  virtballoon_migratepage() returns
  MIGRATEPAGE_BALLOON_SUCCESS instead of MIGRATEPAGE_SUCCESS,
  move_to_new_page() thinks this is an error code and assigns
  newpage->mapping to NULL.  Newly migrated page lose connectivity with
  balloon an all ability for further migration.

* lru_lock erroneously required in isolate_migratepages_range() for
  isolation ballooned page.  This function releases lru_lock periodically,
  this makes migration mostly impossible for some pages.

* balloon_page_dequeue have a tight race with balloon_page_isolate:
  balloon_page_isolate could be executed in parallel with dequeue between
  picking page from list and locking page_lock.  Race is rare because they
  use trylock_page() for locking.

This patch fixes all of them.

Instead of fake mapping with special flag this patch uses special state of
page->_mapcount: PAGE_BALLOON_MAPCOUNT_VALUE = -256.  Buddy allocator uses
PAGE_BUDDY_MAPCOUNT_VALUE = -128 for similar purpose.  Storing mark
directly in struct page makes everything safer and easier.

PagePrivate is used to mark pages present in page list (i.e.  not
isolated, like PageLRU for normal pages).  It replaces special rules for
reference counter and makes balloon migration similar to migration of
normal pages.  This flag is protected by page_lock together with link to
the balloon device.

Signed-off-by: Konstantin Khlebnikov <k.khlebnikov@samsung.com>
Reported-by: Sasha Levin <sasha.levin@oracle.com>
Link: http://lkml.kernel.org/p/53E6CEAA.9020105@oracle.com
Cc: Rafael Aquini <aquini@redhat.com>
Cc: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Cc: <stable@vger.kernel.org>	[3.8+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2014-10-09 22:26:01 -04:00
Johannes Weiner b70a2a21dc mm: memcontrol: fix transparent huge page allocations under pressure
In a memcg with even just moderate cache pressure, success rates for
transparent huge page allocations drop to zero, wasting a lot of effort
that the allocator puts into assembling these pages.

The reason for this is that the memcg reclaim code was never designed for
higher-order charges.  It reclaims in small batches until there is room
for at least one page.  Huge page charges only succeed when these batches
add up over a series of huge faults, which is unlikely under any
significant load involving order-0 allocations in the group.

Remove that loop on the memcg side in favor of passing the actual reclaim
goal to direct reclaim, which is already set up and optimized to meet
higher-order goals efficiently.

This brings memcg's THP policy in line with the system policy: if the
allocator painstakingly assembles a hugepage, memcg will at least make an
honest effort to charge it.  As a result, transparent hugepage allocation
rates amid cache activity are drastically improved:

                                      vanilla                 patched
pgalloc                 4717530.80 (  +0.00%)   4451376.40 (  -5.64%)
pgfault                  491370.60 (  +0.00%)    225477.40 ( -54.11%)
pgmajfault                    2.00 (  +0.00%)         1.80 (  -6.67%)
thp_fault_alloc               0.00 (  +0.00%)       531.60 (+100.00%)
thp_fault_fallback          749.00 (  +0.00%)       217.40 ( -70.88%)

[ Note: this may in turn increase memory consumption from internal
  fragmentation, which is an inherent risk of transparent hugepages.
  Some setups may have to adjust the memcg limits accordingly to
  accomodate this - or, if the machine is already packed to capacity,
  disable the transparent huge page feature. ]

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Vladimir Davydov <vdavydov@parallels.com>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Dave Hansen <dave@sr71.net>
Cc: Greg Thelen <gthelen@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2014-10-09 22:25:59 -04:00
Vladimir Davydov 6f817f4cda memcg: move memcg_update_cache_size() to slab_common.c
`While growing per memcg caches arrays, we jump between memcontrol.c and
slab_common.c in a weird way:

  memcg_alloc_cache_id - memcontrol.c
    memcg_update_all_caches - slab_common.c
      memcg_update_cache_size - memcontrol.c

There's absolutely no reason why memcg_update_cache_size can't live on the
slab's side though.  So let's move it there and settle it comfortably amid
per-memcg cache allocation functions.

Besides, this patch cleans this function up a bit, removing all the
useless comments from it, and renames it to memcg_update_cache_params to
conform to memcg_alloc/free_cache_params, which we already have in
slab_common.c.

Signed-off-by: Vladimir Davydov <vdavydov@parallels.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Michal Hocko <mhocko@suse.cz>
Cc: Christoph Lameter <cl@linux.com>
Cc: Glauber Costa <glommer@gmail.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Pekka Enberg <penberg@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2014-10-09 22:25:59 -04:00
Vladimir Davydov 33a690c45b memcg: move memcg_{alloc,free}_cache_params to slab_common.c
The only reason why they live in memcontrol.c is that we get/put css
reference to the owner memory cgroup in them.  However, we can do that in
memcg_{un,}register_cache.  OTOH, there are several reasons to move them
to slab_common.c.

First, I think that the less public interface functions we have in
memcontrol.h the better.  Since the functions I move don't depend on
memcontrol, I think it's worth making them private to slab, especially
taking into account that the arrays are defined on the slab's side too.

Second, the way how per-memcg arrays are updated looks rather awkward: it
proceeds from memcontrol.c (__memcg_activate_kmem) to slab_common.c
(memcg_update_all_caches) and back to memcontrol.c again
(memcg_update_array_size).  In the following patches I move the function
relocating the arrays (memcg_update_array_size) to slab_common.c and
therefore get rid this circular call path.  I think we should have the
cache allocation stuff in the same place where we have relocation, because
it's easier to follow the code then.  So I move arrays alloc/free
functions to slab_common.c too.

The third point isn't obvious.  I'm going to make the list_lru structure
per-memcg to allow targeted kmem reclaim.  That means we will have
per-memcg arrays in list_lrus too.  It turns out that it's much easier to
update these arrays in list_lru.c rather than in memcontrol.c, because all
the stuff we need is defined there.  This patch makes memcg caches arrays
allocation path conform that of the upcoming list_lru.

So let's move these functions to slab_common.c and make them static.

Signed-off-by: Vladimir Davydov <vdavydov@parallels.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Michal Hocko <mhocko@suse.cz>
Cc: Christoph Lameter <cl@linux.com>
Cc: Glauber Costa <glommer@gmail.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Pekka Enberg <penberg@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2014-10-09 22:25:59 -04:00
Sasha Levin 31c9afa6db mm: introduce VM_BUG_ON_MM
Very similar to VM_BUG_ON_PAGE and VM_BUG_ON_VMA, dump struct_mm when the
bug is hit.

[akpm@linux-foundation.org: coding-style fixes]
[mhocko@suse.cz: fix build]
[mhocko@suse.cz: fix build some more]
[akpm@linux-foundation.org: do strange things to avoid doing strange things for the comma separators]
Signed-off-by: Sasha Levin <sasha.levin@oracle.com>
Cc: Dave Jones <davej@redhat.com>
Signed-off-by: Michal Hocko <mhocko@suse.cz>
Cc: Valdis Kletnieks <Valdis.Kletnieks@vt.edu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2014-10-09 22:25:58 -04:00
Junxiao Bi 934f3072c1 mm: clear __GFP_FS when PF_MEMALLOC_NOIO is set
commit 21caf2fc19 ("mm: teach mm by current context info to not do I/O
during memory allocation") introduces PF_MEMALLOC_NOIO flag to avoid doing
I/O inside memory allocation, __GFP_IO is cleared when this flag is set,
but __GFP_FS implies __GFP_IO, it should also be cleared.  Or it may still
run into I/O, like in superblock shrinker.  And this will make the kernel
run into the deadlock case described in that commit.

See Dave Chinner's comment about io in superblock shrinker:

Filesystem shrinkers do indeed perform IO from the superblock shrinker and
have for years.  Even clean inodes can require IO before they can be freed
- e.g.  on an orphan list, need truncation of post-eof blocks, need to
wait for ordered operations to complete before it can be freed, etc.

IOWs, Ext4, btrfs and XFS all can issue and/or block on arbitrary amounts
of IO in the superblock shrinker context.  XFS, in particular, has been
doing transactions and IO from the VFS inode cache shrinker since it was
first introduced....

Fix this by clearing __GFP_FS in memalloc_noio_flags(), this function has
masked all the gfp_mask that will be passed into fs for the processes
setting PF_MEMALLOC_NOIO in the direct reclaim path.

v1 thread at: https://lkml.org/lkml/2014/9/3/32

Signed-off-by: Junxiao Bi <junxiao.bi@oracle.com>
Cc: Dave Chinner <david@fromorbit.com>
Cc: joyce.xue <xuejiufei@huawei.com>
Cc: Ming Lei <ming.lei@canonical.com>
Cc: Trond Myklebust <trond.myklebust@primarydata.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2014-10-09 22:25:58 -04:00
Johannes Weiner 5705465174 mm: clean up zone flags
Page reclaim tests zone_is_reclaim_dirty(), but the site that actually
sets this state does zone_set_flag(zone, ZONE_TAIL_LRU_DIRTY), sending the
reader through layers indirection just to track down a simple bit.

Remove all zone flag wrappers and just use bitops against zone->flags
directly.  It's just as readable and the lines are barely any longer.

Also rename ZONE_TAIL_LRU_DIRTY to ZONE_DIRTY to match ZONE_WRITEBACK, and
remove the zone_flags_t typedef.

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: David Rientjes <rientjes@google.com>
Acked-by: Mel Gorman <mgorman@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2014-10-09 22:25:57 -04:00
Sasha Levin 81d1b09c6b mm: convert a few VM_BUG_ON callers to VM_BUG_ON_VMA
Trivially convert a few VM_BUG_ON calls to VM_BUG_ON_VMA to extract
more information when they trigger.

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Sasha Levin <sasha.levin@oracle.com>
Reviewed-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Konstantin Khlebnikov <khlebnikov@openvz.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Hugh Dickins <hughd@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Michel Lespinasse <walken@google.com>
Cc: Minchan Kim <minchan@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2014-10-09 22:25:57 -04:00
Sasha Levin fa3759ccd5 mm: introduce VM_BUG_ON_VMA
Very similar to VM_BUG_ON_PAGE but dumps VMA information instead.

Signed-off-by: Sasha Levin <sasha.levin@oracle.com>
Reviewed-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Konstantin Khlebnikov <khlebnikov@openvz.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Hugh Dickins <hughd@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Michel Lespinasse <walken@google.com>
Cc: Minchan Kim <minchan@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2014-10-09 22:25:57 -04:00
Sasha Levin 0bf5513978 mm: introduce dump_vma
Introduce a helper to dump information about a VMA, this also makes
dump_page_flags more generic and re-uses that so the output looks very
similar to dump_page:

[   61.903437] vma ffff88070f88be00 start 00007fff25970000 end 00007fff25992000
[   61.903437] next ffff88070facd600 prev ffff88070face400 mm ffff88070fade000
[   61.903437] prot 8000000000000025 anon_vma ffff88070fa1e200 vm_ops           (null)
[   61.903437] pgoff 7ffffffdd file           (null) private_data           (null)
[   61.909129] flags: 0x100173(read|write|mayread|maywrite|mayexec|growsdown|account)

[akpm@linux-foundation.org: make dump_vma() require CONFIG_DEBUG_VM]
[swarren@nvidia.com: fix dump_vma() compilation]
Signed-off-by: Sasha Levin <sasha.levin@oracle.com>
Reviewed-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Konstantin Khlebnikov <khlebnikov@openvz.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Hugh Dickins <hughd@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Michel Lespinasse <walken@google.com>
Cc: Minchan Kim <minchan@kernel.org>
Signed-off-by: Stephen Warren <swarren@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2014-10-09 22:25:57 -04:00
Andrew Morton 1c93923cc2 include/linux/migrate.h: remove migrate_page #define
This is designed to avoid a few ifdefs in .c files but it's obnoxious
because it can cause unsuspecting "migrate_page" symbols to get turned into
"NULL".

Just nuke it and use the ifdefs.

Cc: Konstantin Khlebnikov <k.khlebnikov@samsung.com>
Cc: Rafael Aquini <aquini@redhat.com>
Cc: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2014-10-09 22:25:56 -04:00
Oleg Nesterov dd6eecb917 mempolicy: unexport get_vma_policy() and remove its "task" arg
- get_vma_policy(task) is not safe if task != current, remove this
  argument.

- get_vma_policy() no longer has callers outside of mempolicy.c,
  make it static.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: David Rientjes <rientjes@google.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2014-10-09 22:25:56 -04:00
Oleg Nesterov 74d2c3a05c mempolicy: introduce __get_vma_policy(), export get_task_policy()
Extract the code which looks for vma's policy from get_vma_policy()
into the new helper, __get_vma_policy(). Export get_task_policy().

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: David Rientjes <rientjes@google.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2014-10-09 22:25:56 -04:00
Oleg Nesterov 6b6482bbf6 mempolicy: remove the "task" arg of vma_policy_mof() and simplify it
1. vma_policy_mof(task) is simply not safe unless task == current,
   it can race with do_exit()->mpol_put(). Remove this arg and update
   its single caller.

2. vma can not be NULL, remove this check and simplify the code.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: David Rientjes <rientjes@google.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2014-10-09 22:25:56 -04:00
Johannes Weiner 1f13ae399c mm: remove noisy remainder of the scan_unevictable interface
The deprecation warnings for the scan_unevictable interface triggers by
scripts doing `sysctl -a | grep something else'.  This is annoying and not
helpful.

The interface has been defunct since 264e56d824 ("mm: disable user
interface to manually rescue unevictable pages"), which was in 2011, and
there haven't been any reports of usecases for it, only reports that the
deprecation warnings are annying.  It's unlikely that anybody is using
this interface specifically at this point, so remove it.

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2014-10-09 22:25:55 -04:00
Cyrill Gorcunov 9c5990240e mm: introduce check_data_rlimit helper
To eliminate code duplication lets introduce check_data_rlimit helper
which we will use in brk() and prctl() syscalls.

Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: Kees Cook <keescook@chromium.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: Andrew Vagin <avagin@openvz.org>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
Cc: Pavel Emelyanov <xemul@parallels.com>
Cc: Vasiliy Kulikov <segoon@openwall.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Julien Tinnes <jln@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2014-10-09 22:25:55 -04:00
David Rientjes 43e7a34d26 mm: rename allocflags_to_migratetype for clarity
The page allocator has gfp flags (like __GFP_WAIT) and alloc flags (like
ALLOC_CPUSET) that have separate semantics.

The function allocflags_to_migratetype() actually takes gfp flags, not
alloc flags, and returns a migratetype.  Rename it to
gfpflags_to_migratetype().

Signed-off-by: David Rientjes <rientjes@google.com>
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: Zhang Yanfei <zhangyanfei@cn.fujitsu.com>
Reviewed-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Acked-by: Minchan Kim <minchan@kernel.org>
Acked-by: Mel Gorman <mgorman@suse.de>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Michal Nazarewicz <mina86@mina86.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2014-10-09 22:25:55 -04:00
Vlastimil Babka 1f9efdef4f mm, compaction: khugepaged should not give up due to need_resched()
Async compaction aborts when it detects zone lock contention or
need_resched() is true.  David Rientjes has reported that in practice,
most direct async compactions for THP allocation abort due to
need_resched().  This means that a second direct compaction is never
attempted, which might be OK for a page fault, but khugepaged is intended
to attempt a sync compaction in such case and in these cases it won't.

This patch replaces "bool contended" in compact_control with an int that
distinguishes between aborting due to need_resched() and aborting due to
lock contention.  This allows propagating the abort through all compaction
functions as before, but passing the abort reason up to
__alloc_pages_slowpath() which decides when to continue with direct
reclaim and another compaction attempt.

Another problem is that try_to_compact_pages() did not act upon the
reported contention (both need_resched() or lock contention) immediately
and would proceed with another zone from the zonelist.  When
need_resched() is true, that means initializing another zone compaction,
only to check again need_resched() in isolate_migratepages() and aborting.
 For zone lock contention, the unintended consequence is that the lock
contended status reported back to the allocator is detrmined from the last
zone where compaction was attempted, which is rather arbitrary.

This patch fixes the problem in the following way:
- async compaction of a zone aborting due to need_resched() or fatal signal
  pending means that further zones should not be tried. We report
  COMPACT_CONTENDED_SCHED to the allocator.
- aborting zone compaction due to lock contention means we can still try
  another zone, since it has different set of locks. We report back
  COMPACT_CONTENDED_LOCK only if *all* zones where compaction was attempted,
  it was aborted due to lock contention.

As a result of these fixes, khugepaged will proceed with second sync
compaction as intended, when the preceding async compaction aborted due to
need_resched().  Page fault compactions aborting due to need_resched()
will spare some cycles previously wasted by initializing another zone
compaction only to abort again.  Lock contention will be reported only
when compaction in all zones aborted due to lock contention, and therefore
it's not a good idea to try again after reclaim.

In stress-highalloc from mmtests configured to use __GFP_NO_KSWAPD, this
has improved number of THP collapse allocations by 10%, which shows
positive effect on khugepaged.  The benchmark's success rates are
unchanged as it is not recognized as khugepaged.  Numbers of compact_stall
and compact_fail events have however decreased by 20%, with
compact_success still a bit improved, which is good.  With benchmark
configured not to use __GFP_NO_KSWAPD, there is 6% improvement in THP
collapse allocations, and only slight improvement in stalls and failures.

[akpm@linux-foundation.org: fix warnings]
Reported-by: David Rientjes <rientjes@google.com>
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Minchan Kim <minchan@kernel.org>
Acked-by: Mel Gorman <mgorman@suse.de>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Michal Nazarewicz <mina86@mina86.com>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2014-10-09 22:25:54 -04:00
Vlastimil Babka 53853e2d2b mm, compaction: defer each zone individually instead of preferred zone
When direct sync compaction is often unsuccessful, it may become deferred
for some time to avoid further useless attempts, both sync and async.
Successful high-order allocations un-defer compaction, while further
unsuccessful compaction attempts prolong the compaction deferred period.

Currently the checking and setting deferred status is performed only on
the preferred zone of the allocation that invoked direct compaction.  But
compaction itself is attempted on all eligible zones in the zonelist, so
the behavior is suboptimal and may lead both to scenarios where 1)
compaction is attempted uselessly, or 2) where it's not attempted despite
good chances of succeeding, as shown on the examples below:

1) A direct compaction with Normal preferred zone failed and set
   deferred compaction for the Normal zone.  Another unrelated direct
   compaction with DMA32 as preferred zone will attempt to compact DMA32
   zone even though the first compaction attempt also included DMA32 zone.

   In another scenario, compaction with Normal preferred zone failed to
   compact Normal zone, but succeeded in the DMA32 zone, so it will not
   defer compaction.  In the next attempt, it will try Normal zone which
   will fail again, instead of skipping Normal zone and trying DMA32
   directly.

2) Kswapd will balance DMA32 zone and reset defer status based on
   watermarks looking good.  A direct compaction with preferred Normal
   zone will skip compaction of all zones including DMA32 because Normal
   was still deferred.  The allocation might have succeeded in DMA32, but
   won't.

This patch makes compaction deferring work on individual zone basis
instead of preferred zone.  For each zone, it checks compaction_deferred()
to decide if the zone should be skipped.  If watermarks fail after
compacting the zone, defer_compaction() is called.  The zone where
watermarks passed can still be deferred when the allocation attempt is
unsuccessful.  When allocation is successful, compaction_defer_reset() is
called for the zone containing the allocated page.  This approach should
approximate calling defer_compaction() only on zones where compaction was
attempted and did not yield allocated page.  There might be corner cases
but that is inevitable as long as the decision to stop compacting dues not
guarantee that a page will be allocated.

Due to a new COMPACT_DEFERRED return value, some functions relying
implicitly on COMPACT_SKIPPED = 0 had to be updated, with comments made
more accurate.  The did_some_progress output parameter of
__alloc_pages_direct_compact() is removed completely, as the caller
actually does not use it after compaction sets it - it is only considered
when direct reclaim sets it.

During testing on a two-node machine with a single very small Normal zone
on node 1, this patch has improved success rates in stress-highalloc
mmtests benchmark.  The success here were previously made worse by commit
3a025760fc ("mm: page_alloc: spill to remote nodes before waking
kswapd") as kswapd was no longer resetting often enough the deferred
compaction for the Normal zone, and DMA32 zones on both nodes were thus
not considered for compaction.  On different machine, success rates were
improved with __GFP_NO_KSWAPD allocations.

[akpm@linux-foundation.org: fix CONFIG_COMPACTION=n build]
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Minchan Kim <minchan@kernel.org>
Reviewed-by: Zhang Yanfei <zhangyanfei@cn.fujitsu.com>
Acked-by: Mel Gorman <mgorman@suse.de>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Michal Nazarewicz <mina86@mina86.com>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2014-10-09 22:25:53 -04:00
Laura Abbott 9efb3a421d lib/genalloc.c: add genpool range check function
After allocating an address from a particular genpool, there is no good
way to verify if that address actually belongs to a genpool.  Introduce
addr_in_gen_pool which will return if an address plus size falls
completely within the genpool range.

Signed-off-by: Laura Abbott <lauraa@codeaurora.org>
Acked-by: Will Deacon <will.deacon@arm.com>
Reviewed-by: Olof Johansson <olof@lixom.net>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: David Riley <davidriley@chromium.org>
Cc: Ritesh Harjain <ritesh.harjani@gmail.com>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Thierry Reding <thierry.reding@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2014-10-09 22:25:52 -04:00
Laura Abbott 505e3be6c0 lib/genalloc.c: add power aligned algorithm
One of the more common algorithms used for allocation is to align the
start address of the allocation to the order of size requested.  Add this
as an algorithm option for genalloc.

Signed-off-by: Laura Abbott <lauraa@codeaurora.org>
Acked-by: Will Deacon <will.deacon@arm.com>
Acked-by: Olof Johansson <olof@lixom.net>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: David Riley <davidriley@chromium.org>
Cc: Ritesh Harjain <ritesh.harjani@gmail.com>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Thierry Reding <thierry.reding@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2014-10-09 22:25:52 -04:00
Zhang Zhen ed2f240094 memory-hotplug: add sysfs valid_zones attribute
Currently memory-hotplug has two limits:

1. If the memory block is in ZONE_NORMAL, you can change it to
   ZONE_MOVABLE, but this memory block must be adjacent to ZONE_MOVABLE.

2. If the memory block is in ZONE_MOVABLE, you can change it to
   ZONE_NORMAL, but this memory block must be adjacent to ZONE_NORMAL.

With this patch, we can easy to know a memory block can be onlined to
which zone, and don't need to know the above two limits.

Updated the related Documentation.

[akpm@linux-foundation.org: use conventional comment layout]
[akpm@linux-foundation.org: fix build with CONFIG_MEMORY_HOTREMOVE=n]
[akpm@linux-foundation.org: remove unused local zone_prev]
Signed-off-by: Zhang Zhen <zhenzhang.zhang@huawei.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Toshi Kani <toshi.kani@hp.com>
Cc: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Wang Nan <wangnan0@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2014-10-09 22:25:52 -04:00
Joonsoo Kim bf0dea23a9 mm/slab: use percpu allocator for cpu cache
Because of chicken and egg problem, initialization of SLAB is really
complicated.  We need to allocate cpu cache through SLAB to make the
kmem_cache work, but before initialization of kmem_cache, allocation
through SLAB is impossible.

On the other hand, SLUB does initialization in a more simple way.  It uses
percpu allocator to allocate cpu cache so there is no chicken and egg
problem.

So, this patch try to use percpu allocator in SLAB.  This simplifies the
initialization step in SLAB so that we could maintain SLAB code more
easily.

In my testing there is no performance difference.

This implementation relies on percpu allocator.  Because percpu allocator
uses vmalloc address space, vmalloc address space could be exhausted by
this change on many cpu system with *32 bit* kernel.  This implementation
can cover 1024 cpus in worst case by following calculation.

Worst: 1024 cpus * 4 bytes for pointer * 300 kmem_caches *
	120 objects per cpu_cache = 140 MB
Normal: 1024 cpus * 4 bytes for pointer * 150 kmem_caches(slab merge) *
	80 objects per cpu_cache = 46 MB

Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Acked-by: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Jeremiah Mahler <jmmahler@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2014-10-09 22:25:51 -04:00
Joonsoo Kim ad2c814441 topology: add support for node_to_mem_node() to determine the fallback node
Anton noticed (http://www.spinics.net/lists/linux-mm/msg67489.html) that
on ppc LPARs with memoryless nodes, a large amount of memory was consumed
by slabs and was marked unreclaimable.  He tracked it down to slab
deactivations in the SLUB core when we allocate remotely, leading to poor
efficiency always when memoryless nodes are present.

After much discussion, Joonsoo provided a few patches that help
significantly.  They don't resolve the problem altogether:

 - memory hotplug still needs testing, that is when a memoryless node
   becomes memory-ful, we want to dtrt
 - there are other reasons for going off-node than memoryless nodes,
   e.g., fully exhausted local nodes

Neither case is resolved with this series, but I don't think that should
block their acceptance, as they can be explored/resolved with follow-on
patches.

The series consists of:

[1/3] topology: add support for node_to_mem_node() to determine the
      fallback node

[2/3] slub: fallback to node_to_mem_node() node if allocating on
      memoryless node

      - Joonsoo's patches to cache the nearest node with memory for each
        NUMA node

[3/3] Partial revert of 81c98869fa (""kthread: ensure locality of
      task_struct allocations")

 - At Tejun's request, keep the knowledge of memoryless node fallback
   to the allocator core.

This patch (of 3):

We need to determine the fallback node in slub allocator if the allocation
target node is memoryless node.  Without it, the SLUB wrongly select the
node which has no memory and can't use a partial slab, because of node
mismatch.  Introduced function, node_to_mem_node(X), will return a node Y
with memory that has the nearest distance.  If X is memoryless node, it
will return nearest distance node, but, if X is normal node, it will
return itself.

We will use this function in following patch to determine the fallback
node.

Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Signed-off-by: Nishanth Aravamudan <nacc@linux.vnet.ibm.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Han Pingtian <hanpt@linux.vnet.ibm.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Anton Blanchard <anton@samba.org>
Cc: Christoph Lameter <cl@linux.com>
Cc: Wanpeng Li <liwanp@linux.vnet.ibm.com>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2014-10-09 22:25:51 -04:00
Joonsoo Kim 61f47105a2 mm/sl[ao]b: always track caller in kmalloc_(node_)track_caller()
Now, we track caller if tracing or slab debugging is enabled.  If they are
disabled, we could save one argument passing overhead by calling
__kmalloc(_node)().  But, I think that it would be marginal.  Furthermore,
default slab allocator, SLUB, doesn't use this technique so I think that
it's okay to change this situation.

After this change, we can turn on/off CONFIG_DEBUG_SLAB without full
kernel build and remove some complicated '#if' defintion.  It looks more
benefitial to me.

Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Acked-by: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Zhang Yanfei <zhangyanfei@cn.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2014-10-09 22:25:50 -04:00
Joonsoo Kim 07f361b2be mm/slab_common: move kmem_cache definition to internal header
We don't need to keep kmem_cache definition in include/linux/slab.h if we
don't need to inline kmem_cache_size().  According to my code inspection,
this function is only called at lc_create() in lib/lru_cache.c which may
be called at initialization phase of something, so we don't need to inline
it.  Therfore, move it to slab_common.c and move kmem_cache definition to
internal header.

After this change, we can change kmem_cache definition easily without full
kernel build.  For instance, we can turn on/off CONFIG_SLUB_STATS without
full kernel build.

[akpm@linux-foundation.org: export kmem_cache_size() to modules]
[rdunlap@infradead.org: add header files to fix kmemcheck.c build errors]
Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Acked-by: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Zhang Yanfei <zhangyanfei@cn.fujitsu.com>
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2014-10-09 22:25:50 -04:00
Oleg Nesterov 58cb65487e proc/maps: make vm_is_stack() logic namespace-friendly
- Rename vm_is_stack() to task_of_stack() and change it to return
  "struct task_struct *" rather than the global (and thus wrong in
  general) pid_t.

- Add the new pid_of_stack() helper which calls task_of_stack() and
  uses the right namespace to report the correct pid_t.

  Unfortunately we need to define this helper twice, in task_mmu.c
  and in task_nommu.c. perhaps it makes sense to add fs/proc/util.c
  and move at least pid_of_stack/task_of_stack there to avoid the
  code duplication.

- Change show_map_vma() and show_numa_map() to use the new helper.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Greg Ungerer <gerg@uclinux.org>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2014-10-09 22:25:50 -04:00
Linus Torvalds b528392669 ACPI and power management updates for 3.18-rc1
- Rework the handling of wakeup IRQs by the IRQ core such that
    all of them will be switched over to "wakeup" mode in
    suspend_device_irqs() and in that mode the first interrupt
    will abort system suspend in progress or wake up the system
    if already in suspend-to-idle (or equivalent) without executing
    any interrupt handlers.  Among other things that eliminates the
    wakeup-related motivation to use the IRQF_NO_SUSPEND interrupt
    flag with interrupts which don't really need it and should not
    use it (Thomas Gleixner and Rafael J Wysocki).
 
  - Switch over ACPI to handling wakeup interrupts with the help
    of the new mechanism introduced by the above IRQ core rework
    (Rafael J Wysocki).
 
  - Rework the core generic PM domains code to eliminate code that's
    not used, add DT support and add a generic mechanism by which
    devices can be added to PM domains automatically during
    enumeration (Ulf Hansson, Geert Uytterhoeven and Tomasz Figa).
 
  - Add debugfs-based mechanics for debugging generic PM domains
    (Maciej Matraszek).
 
  - ACPICA update to upstream version 20140828.  Included are updates
    related to the SRAT and GTDT tables and the _PSx methods are in
    the METHOD_NAME list now (Bob Moore and Hanjun Guo).
 
  - Add _OSI("Darwin") support to the ACPI core (unfortunately, that
    can't really be done in a straightforward way) to prevent
    Thunderbolt from being turned off on Apple systems after boot
    (or after resume from system suspend) and rework the ACPI Smart
    Battery Subsystem (SBS) driver to work correctly with Apple
    platforms (Matthew Garrett and Andreas Noever).
 
  - ACPI LPSS (Low-Power Subsystem) driver update cleaning up the
    code, adding support for 133MHz I2C source clock on Intel Baytrail
    to it and making it avoid using UART RTS override with Auto Flow
    Control (Heikki Krogerus).
 
  - ACPI backlight updates removing the video_set_use_native_backlight
    quirk which is not necessary any more, making the code check the
    list of output devices returned by the _DOD method to avoid
    creating acpi_video interfaces that won't work and adding a quirk
    for Lenovo Ideapad Z570 (Hans de Goede, Aaron Lu and Stepan Bujnak).
 
  - New Win8 ACPI OSI quirks for some Dell laptops (Edward Lin).
 
  - Assorted ACPI code cleanups (Fabian Frederick, Rasmus Villemoes,
    Sudip Mukherjee, Yijing Wang, and Zhang Rui).
 
  - cpufreq core updates and cleanups (Viresh Kumar, Preeti U Murthy,
    Rasmus Villemoes).
 
  - cpufreq driver updates: cpufreq-cpu0/cpufreq-dt (driver name
    change among other things), ppc-corenet, powernv (Viresh Kumar,
    Preeti U Murthy, Shilpasri G Bhat, Lucas Stach).
 
  - cpuidle support for DT-based idle states infrastructure, new
    ARM64 cpuidle driver, cpuidle core cleanups (Lorenzo Pieralisi,
    Rasmus Villemoes).
 
  - ARM big.LITTLE cpuidle driver updates: support for DT-based
    initialization and Exynos5800 compatible string (Lorenzo Pieralisi,
    Kevin Hilman).
 
  - Rework of the test_suspend kernel command line argument and
    a new trace event for console resume (Srinivas Pandruvada,
    Todd E Brandt).
 
  - Second attempt to optimize swsusp_free() (hibernation core) to
    make it avoid going through all PFNs which may be way too slow on
    some systems (Joerg Roedel).
 
  - devfreq updates (Paul Bolle, Punit Agrawal, Ãrjan Eide).
 
  - rockchip-io Adaptive Voltage Scaling (AVS) driver and AVS
    entry update in MAINTAINERS (Heiko Stübner, Kevin Hilman).
 
  - PM core fix related to clock management (Geert Uytterhoeven).
 
  - PM core's sysfs code cleanup (Johannes Berg).
 
 /
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v2.0.22 (GNU/Linux)
 
 iQIcBAABCAAGBQJUNbJoAAoJEILEb/54YlRxRp8QAJyGIPdx+f03oBir+7vvEwhY
 svxd+V9xXK0UgWNGkCvlMk/1RIVy0qqtXliUrDaE+9tcHACA9+iAxMmNmDsjLOiO
 gpazuz5kgeznrmp1eNwQnYTt+OCReQIcyCsj4q4fNo9bbETTyr2bRz226LEuZekC
 TAiKdphYoOszFBgTVg5gfu+lqjHyXjgXPnwMTlRYn1y4YL2adDIgxj9cFedykTTW
 Eu593TY2dH6ovERJ6q3qxZbRuWuxtww95J07b3t2/2Eb3e/R/zlX0/XJ/C88f/m2
 DkqngbOYqCdw+zJeN6k8631foyfUwAcTd0sJ1+5nsm5H4NE5NqObjbxOk5/yNht6
 HgvgISGHWLerEw+A/Dk6o0oZOtR1G/TAQ5qQk5nUfKT/sSoU+9/USsXtWhXwZCia
 XccnJgW6ZtPrJJP3zDnkrxe3gndmLic11QXArw2IhWTsq0sZlAyMgtauBXLdDiQa
 H/AMiYrUNmIABef1cirBLTtgXN4Zbsai9vIrxMmV7OgBrclrh52NTjzr05P5Hnl2
 fRK56mb6mP59LymI7n8fyXL8tHnbNwFvTaxuvrZmzcYbzL0l9DuPocJrrTHRSfhm
 GFfzfvLj0R66ZM4PthRSwz4H2v1FnlRcCkj5k/QjtBPlyzxtOnJveqve5umbrnb9
 T5mRmlAs4iYwLuKCVVNT
 =sIv/
 -----END PGP SIGNATURE-----

Merge tag 'pm+acpi-3.18-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm

Pull ACPI and power management updates from Rafael Wysocki:
 "Features-wise, to me the most important this time is a rework of
  wakeup interrupts handling in the core that makes them work
  consistently across all of the available sleep states, including
  suspend-to-idle.  Many thanks to Thomas Gleixner for his help with
  this work.

  Second is an update of the generic PM domains code that has been in
  need of some care for quite a while.  Unused code is being removed, DT
  support is being added and domains are now going to be attached to
  devices in bus type code in analogy with the ACPI PM domain.  The
  majority of work here was done by Ulf Hansson who also has been the
  most active developer this time.

  Apart from this we have a traditional ACPICA update, this time to
  upstream version 20140828 and a few ACPI wakeup interrupts handling
  patches on top of the general rework mentioned above.  There also are
  several cpufreq commits including renaming the cpufreq-cpu0 driver to
  cpufreq-dt, as this is what implements generic DT-based cpufreq
  support, and a new DT-based idle states infrastructure for cpuidle.

  In addition to that, the ACPI LPSS driver is updated, ACPI support for
  Apple machines is improved, a few bugs are fixed and a few cleanups
  are made all over.

  Finally, the Adaptive Voltage Scaling (AVS) subsystem now has a tree
  maintained by Kevin Hilman that will be merged through the PM tree.

  Numbers-wise, the generic PM domains update takes the lead this time
  with 32 non-merge commits, second is cpufreq (15 commits) and the 3rd
  place goes to the wakeup interrupts handling rework (13 commits).

  Specifics:

   - Rework the handling of wakeup IRQs by the IRQ core such that all of
     them will be switched over to "wakeup" mode in suspend_device_irqs()
     and in that mode the first interrupt will abort system suspend in
     progress or wake up the system if already in suspend-to-idle (or
     equivalent) without executing any interrupt handlers.  Among other
     things that eliminates the wakeup-related motivation to use the
     IRQF_NO_SUSPEND interrupt flag with interrupts which don't really
     need it and should not use it (Thomas Gleixner and Rafael Wysocki)

   - Switch over ACPI to handling wakeup interrupts with the help of the
     new mechanism introduced by the above IRQ core rework (Rafael Wysocki)

   - Rework the core generic PM domains code to eliminate code that's
     not used, add DT support and add a generic mechanism by which
     devices can be added to PM domains automatically during enumeration
     (Ulf Hansson, Geert Uytterhoeven and Tomasz Figa).

   - Add debugfs-based mechanics for debugging generic PM domains
     (Maciej Matraszek).

   - ACPICA update to upstream version 20140828.  Included are updates
     related to the SRAT and GTDT tables and the _PSx methods are in the
     METHOD_NAME list now (Bob Moore and Hanjun Guo).

   - Add _OSI("Darwin") support to the ACPI core (unfortunately, that
     can't really be done in a straightforward way) to prevent
     Thunderbolt from being turned off on Apple systems after boot (or
     after resume from system suspend) and rework the ACPI Smart Battery
     Subsystem (SBS) driver to work correctly with Apple platforms
     (Matthew Garrett and Andreas Noever).

   - ACPI LPSS (Low-Power Subsystem) driver update cleaning up the code,
     adding support for 133MHz I2C source clock on Intel Baytrail to it
     and making it avoid using UART RTS override with Auto Flow Control
     (Heikki Krogerus).

   - ACPI backlight updates removing the video_set_use_native_backlight
     quirk which is not necessary any more, making the code check the
     list of output devices returned by the _DOD method to avoid
     creating acpi_video interfaces that won't work and adding a quirk
     for Lenovo Ideapad Z570 (Hans de Goede, Aaron Lu and Stepan Bujnak)

   - New Win8 ACPI OSI quirks for some Dell laptops (Edward Lin)

   - Assorted ACPI code cleanups (Fabian Frederick, Rasmus Villemoes,
     Sudip Mukherjee, Yijing Wang, and Zhang Rui)

   - cpufreq core updates and cleanups (Viresh Kumar, Preeti U Murthy,
     Rasmus Villemoes)

   - cpufreq driver updates: cpufreq-cpu0/cpufreq-dt (driver name change
     among other things), ppc-corenet, powernv (Viresh Kumar, Preeti U
     Murthy, Shilpasri G Bhat, Lucas Stach)

   - cpuidle support for DT-based idle states infrastructure, new ARM64
     cpuidle driver, cpuidle core cleanups (Lorenzo Pieralisi, Rasmus
     Villemoes)

   - ARM big.LITTLE cpuidle driver updates: support for DT-based
     initialization and Exynos5800 compatible string (Lorenzo Pieralisi,
     Kevin Hilman)

   - Rework of the test_suspend kernel command line argument and a new
     trace event for console resume (Srinivas Pandruvada, Todd E Brandt)

   - Second attempt to optimize swsusp_free() (hibernation core) to make
     it avoid going through all PFNs which may be way too slow on some
     systems (Joerg Roedel)

   - devfreq updates (Paul Bolle, Punit Agrawal, Ãrjan Eide).

   - rockchip-io Adaptive Voltage Scaling (AVS) driver and AVS entry
     update in MAINTAINERS (Heiko Stübner, Kevin Hilman)

   - PM core fix related to clock management (Geert Uytterhoeven)

   - PM core's sysfs code cleanup (Johannes Berg)"

* tag 'pm+acpi-3.18-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm: (105 commits)
  ACPI / fan: printk replacement
  PM / clk: Fix crash in clocks management code if !CONFIG_PM_RUNTIME
  PM / Domains: Rename cpu_data to cpuidle_data
  cpufreq: cpufreq-dt: fix potential double put of cpu OF node
  cpufreq: cpu0: rename driver and internals to 'cpufreq_dt'
  PM / hibernate: Iterate over set bits instead of PFNs in swsusp_free()
  cpufreq: ppc-corenet: remove duplicate update of cpu_data
  ACPI / sleep: Rework the handling of ACPI GPE wakeup from suspend-to-idle
  PM / sleep: Rename platform suspend/resume functions in suspend.c
  PM / sleep: Export dpm_suspend_late/noirq() and dpm_resume_early/noirq()
  ACPICA: Introduce acpi_enable_all_wakeup_gpes()
  ACPICA: Clear all non-wakeup GPEs in acpi_hw_enable_wakeup_gpe_block()
  ACPI / video: check _DOD list when creating backlight devices
  PM / Domains: Move dev_pm_domain_attach|detach() to pm_domain.h
  cpufreq: Replace strnicmp with strncasecmp
  cpufreq: powernv: Set the cpus to nominal frequency during reboot/kexec
  cpufreq: powernv: Set the pstate of the last hotplugged out cpu in policy->cpus to minimum
  cpufreq: Allow stop CPU callback to be used by all cpufreq drivers
  PM / devfreq: exynos: Enable building exynos PPMU as module
  PM / devfreq: Export helper functions for drivers
  ...
2014-10-09 16:07:43 -04:00
Linus Torvalds 80213c03c4 PCI changes for the v3.18 merge window:
Enumeration
     - Check Vendor ID only for Config Request Retry Status (Rajat Jain)
     - Enable Config Request Retry Status when supported (Rajat Jain)
     - Add generic domain handling (Catalin Marinas)
     - Generate uppercase hex for modalias interface class (Ricardo Ribalda Delgado)
 
   Resource management
     - Add missing MEM_64 mask in pci_assign_unassigned_bridge_resources() (Yinghai Lu)
     - Increase IBM ipr SAS Crocodile BARs to at least system page size (Douglas Lehr)
 
   PCI device hotplug
     - Prevent NULL dereference during pciehp probe (Andreas Noever)
     - Move _HPP & _HPX handling into core (Bjorn Helgaas)
     - Apply _HPP to PCIe devices as well as PCI (Bjorn Helgaas)
     - Apply _HPP/_HPX to display devices (Bjorn Helgaas)
     - Preserve SERR & PARITY settings when applying _HPP/_HPX (Bjorn Helgaas)
     - Preserve MPS and MRRS settings when applying _HPP/_HPX (Bjorn Helgaas)
     - Apply _HPP/_HPX to all devices, not just hot-added ones (Bjorn Helgaas)
     - Fix wait time in pciehp timeout message (Yinghai Lu)
     - Add more pciehp Slot Control debug output (Yinghai Lu)
     - Stop disabling pciehp notifications during init (Yinghai Lu)
 
   MSI
     - Remove arch_msi_check_device() (Alexander Gordeev)
     - Rename pci_msi_check_device() to pci_msi_supported() (Alexander Gordeev)
     - Move D0 check into pci_msi_check_device() (Alexander Gordeev)
     - Remove unused kobject from struct msi_desc (Yijing Wang)
     - Remove "pos" from the struct msi_desc msi_attrib (Yijing Wang)
     - Add "msi_bus" sysfs MSI/MSI-X control for endpoints (Yijing Wang)
     - Use __get_cached_msi_msg() instead of get_cached_msi_msg() (Yijing Wang)
     - Use __read_msi_msg() instead of read_msi_msg() (Yijing Wang)
     - Use __write_msi_msg() instead of write_msi_msg() (Yijing Wang)
 
   Power management
     - Drop unused runtime PM support code for PCIe ports (Rafael J.  Wysocki)
     - Allow PCI devices to be put into D3cold during system suspend (Rafael J. Wysocki)
 
   AER
     - Add additional AER error strings (Gong Chen)
     - Make <linux/aer.h> standalone includable (Thierry Reding)
 
   Virtualization
     - Add ACS quirk for Solarflare SFC9120 & SFC9140 (Alex Williamson)
     - Add ACS quirk for Intel 10G NICs (Alex Williamson)
     - Add ACS quirk for AMD A88X southbridge (Marti Raudsepp)
     - Remove unused pci_find_upstream_pcie_bridge(), pci_get_dma_source() (Alex Williamson)
     - Add device flag helpers (Ethan Zhao)
     - Assume all Mellanox devices have broken INTx masking (Gavin Shan)
 
   Generic host bridge driver
     - Fix ioport_map() for !CONFIG_GENERIC_IOMAP (Liviu Dudau)
     - Add pci_register_io_range() and pci_pio_to_address() (Liviu Dudau)
     - Define PCI_IOBASE as the base of virtual PCI IO space (Liviu Dudau)
     - Fix the conversion of IO ranges into IO resources (Liviu Dudau)
     - Add pci_get_new_domain_nr() and of_get_pci_domain_nr() (Liviu Dudau)
     - Add support for parsing PCI host bridge resources from DT (Liviu Dudau)
     - Add pci_remap_iospace() to map bus I/O resources (Liviu Dudau)
     - Add arm64 architectural support for PCI (Liviu Dudau)
 
   APM X-Gene
     - Add APM X-Gene PCIe driver (Tanmay Inamdar)
     - Add arm64 DT APM X-Gene PCIe device tree nodes (Tanmay Inamdar)
 
   Freescale i.MX6
     - Probe in module_init(), not fs_initcall() (Lucas Stach)
     - Delay enabling reference clock for SS until it stabilizes (Tim Harvey)
 
   Marvell MVEBU
     - Fix uninitialized variable in mvebu_get_tgt_attr() (Thomas Petazzoni)
 
   NVIDIA Tegra
     - Make sure the PCIe PLL is really reset (Eric Yuen)
     - Add error path tegra_msi_teardown_irq() cleanup (Jisheng Zhang)
     - Fix extended configuration space mapping (Peter Daifuku)
     - Implement resource hierarchy (Thierry Reding)
     - Clear CLKREQ# enable on port disable (Thierry Reding)
     - Add Tegra124 support (Thierry Reding)
 
   ST Microelectronics SPEAr13xx
     - Pass config resource through reg property (Pratyush Anand)
 
   Synopsys DesignWare
     - Use NULL instead of false (Fabio Estevam)
     - Parse bus-range property from devicetree (Lucas Stach)
     - Use pci_create_root_bus() instead of pci_scan_root_bus() (Lucas Stach)
     - Remove pci_assign_unassigned_resources() (Lucas Stach)
     - Check private_data validity in single place (Lucas Stach)
     - Setup and clear exactly one MSI at a time (Lucas Stach)
     - Remove open-coded bitmap operations (Lucas Stach)
     - Fix configuration base address when using 'reg' (Minghuan Lian)
     - Fix IO resource end address calculation (Minghuan Lian)
     - Rename get_msi_data() to get_msi_addr() (Minghuan Lian)
     - Add get_msi_data() to pcie_host_ops (Minghuan Lian)
     - Add support for v3.65 hardware (Murali Karicheri)
     - Fold struct pcie_port_info into struct pcie_port (Pratyush Anand)
 
   TI Keystone
     - Add TI Keystone PCIe driver (Murali Karicheri)
     - Limit MRSS for all downstream devices (Murali Karicheri)
     - Assume controller is already in RC mode (Murali Karicheri)
     - Set device ID based on SoC to support multiple ports (Murali Karicheri)
 
   Xilinx AXI
     - Add Xilinx AXI PCIe driver (Srikanth Thokala)
     - Fix xilinx_pcie_assign_msi() return value test (Dan Carpenter)
 
   Miscellaneous
     - Clean up whitespace (Quentin Lambert)
     - Remove assignments from "if" conditions (Quentin Lambert)
     - Move PCI_VENDOR_ID_VMWARE to pci_ids.h (Francesco Ruggeri)
     - x86: Mark DMI tables as initialization data (Mathias Krause)
     - x86: Move __init annotation to the correct place (Mathias Krause)
     - x86: Mark constants of pci_mmcfg_nvidia_mcp55() as __initconst (Mathias Krause)
     - x86: Constify pci_mmcfg_probes[] array (Mathias Krause)
     - x86: Mark PCI BIOS initialization code as such (Mathias Krause)
     - Parenthesize PCI_DEVID and PCI_VPD_LRDT_ID parameters (Megan Kamiya)
     - Remove unnecessary variable in pci_add_dynid() (Tobias Klauser)
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v1
 
 iQIcBAABAgAGBQJUNWmJAAoJEFmIoMA60/r8GncP/3uHRoBrnaF6pv+S1l1p3Fs/
 l1kKH91/IuAAU7VJX8pkNybFqx02topWmiVVXAzqvD01PcRLGCLjPbWl5h+y5/Ja
 CHZH33AwHAmm0kt4BrOSOeHTLJhAigly2zV3P4F8jRIgyaeMoGZ6Ko4tkQUpm21k
 +ohrOd4cxYkmzzCjKwsZZhKnyRNpae8FmTk3VQBPuN8DbhvFPrqo5/+GeAdSZTdS
 HZHpfl2HL4095aY7uBVsZqNkjQyl6SnWwjkjLnuI8q3qA3BLgDZE/Jr8F/MNuW1V
 y01JIjerFWMDFyBIkpg7moYnODy6oP3KvczwYdKGmqsJja+0MQvYhLTwD+R/yTQS
 SewJA0mL3T3EJEfnFYkCiaIX27xIwk/FxHfaKPN91xgx/QM7xCVZNrU2/dXjhoX1
 GqLKxOEaFHhWWTyT5Dj27I0ZcElzFZ3tIwvrHfs8y22oAuAlsAypaUgvUwRfL4CO
 hOj4ITZa0t041sYWqxCoGAA9Fdp8HMzNKKS5F4mhADz4Ad9v6uPCNv/s/RoxVsbm
 jhZOtPYJ0/iCA+kNVX563S8Z3VpfPI+7bBjcj2WKdzW+IlICvOKT+kvwL2Tv/rE7
 w0hrNsbkgGsYbPldMx7LwCavsUtYFuNj0zoU6vkhP2jk6O2Tn5VXDmjrXH0v3iHI
 v03vlUtre0bQ26fzDyLQ
 =4Zv1
 -----END PGP SIGNATURE-----

Merge tag 'pci-v3.18-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/helgaas/pci

Pull PCI updates from Bjorn Helgaas:
 "The interesting things here are:

   - Turn on Config Request Retry Status Software Visibility.  This
     caused hangs last time, but we included a fix this time.
   - Rework PCI device configuration to use _HPP/_HPX more aggressively
   - Allow PCI devices to be put into D3cold during system suspend
   - Add arm64 PCI support
   - Add APM X-Gene host bridge driver
   - Add TI Keystone host bridge driver
   - Add Xilinx AXI host bridge driver

  More detailed summary:

  Enumeration
    - Check Vendor ID only for Config Request Retry Status (Rajat Jain)
    - Enable Config Request Retry Status when supported (Rajat Jain)
    - Add generic domain handling (Catalin Marinas)
    - Generate uppercase hex for modalias interface class (Ricardo Ribalda Delgado)

  Resource management
    - Add missing MEM_64 mask in pci_assign_unassigned_bridge_resources() (Yinghai Lu)
    - Increase IBM ipr SAS Crocodile BARs to at least system page size (Douglas Lehr)

  PCI device hotplug
    - Prevent NULL dereference during pciehp probe (Andreas Noever)
    - Move _HPP & _HPX handling into core (Bjorn Helgaas)
    - Apply _HPP to PCIe devices as well as PCI (Bjorn Helgaas)
    - Apply _HPP/_HPX to display devices (Bjorn Helgaas)
    - Preserve SERR & PARITY settings when applying _HPP/_HPX (Bjorn Helgaas)
    - Preserve MPS and MRRS settings when applying _HPP/_HPX (Bjorn Helgaas)
    - Apply _HPP/_HPX to all devices, not just hot-added ones (Bjorn Helgaas)
    - Fix wait time in pciehp timeout message (Yinghai Lu)
    - Add more pciehp Slot Control debug output (Yinghai Lu)
    - Stop disabling pciehp notifications during init (Yinghai Lu)

  MSI
    - Remove arch_msi_check_device() (Alexander Gordeev)
    - Rename pci_msi_check_device() to pci_msi_supported() (Alexander Gordeev)
    - Move D0 check into pci_msi_check_device() (Alexander Gordeev)
    - Remove unused kobject from struct msi_desc (Yijing Wang)
    - Remove "pos" from the struct msi_desc msi_attrib (Yijing Wang)
    - Add "msi_bus" sysfs MSI/MSI-X control for endpoints (Yijing Wang)
    - Use __get_cached_msi_msg() instead of get_cached_msi_msg() (Yijing Wang)
    - Use __read_msi_msg() instead of read_msi_msg() (Yijing Wang)
    - Use __write_msi_msg() instead of write_msi_msg() (Yijing Wang)

  Power management
    - Drop unused runtime PM support code for PCIe ports (Rafael J.  Wysocki)
    - Allow PCI devices to be put into D3cold during system suspend (Rafael J. Wysocki)

  AER
    - Add additional AER error strings (Gong Chen)
    - Make <linux/aer.h> standalone includable (Thierry Reding)

  Virtualization
    - Add ACS quirk for Solarflare SFC9120 & SFC9140 (Alex Williamson)
    - Add ACS quirk for Intel 10G NICs (Alex Williamson)
    - Add ACS quirk for AMD A88X southbridge (Marti Raudsepp)
    - Remove unused pci_find_upstream_pcie_bridge(), pci_get_dma_source() (Alex Williamson)
    - Add device flag helpers (Ethan Zhao)
    - Assume all Mellanox devices have broken INTx masking (Gavin Shan)

  Generic host bridge driver
    - Fix ioport_map() for !CONFIG_GENERIC_IOMAP (Liviu Dudau)
    - Add pci_register_io_range() and pci_pio_to_address() (Liviu Dudau)
    - Define PCI_IOBASE as the base of virtual PCI IO space (Liviu Dudau)
    - Fix the conversion of IO ranges into IO resources (Liviu Dudau)
    - Add pci_get_new_domain_nr() and of_get_pci_domain_nr() (Liviu Dudau)
    - Add support for parsing PCI host bridge resources from DT (Liviu Dudau)
    - Add pci_remap_iospace() to map bus I/O resources (Liviu Dudau)
    - Add arm64 architectural support for PCI (Liviu Dudau)

  APM X-Gene
    - Add APM X-Gene PCIe driver (Tanmay Inamdar)
    - Add arm64 DT APM X-Gene PCIe device tree nodes (Tanmay Inamdar)

  Freescale i.MX6
    - Probe in module_init(), not fs_initcall() (Lucas Stach)
    - Delay enabling reference clock for SS until it stabilizes (Tim Harvey)

  Marvell MVEBU
    - Fix uninitialized variable in mvebu_get_tgt_attr() (Thomas Petazzoni)

  NVIDIA Tegra
    - Make sure the PCIe PLL is really reset (Eric Yuen)
    - Add error path tegra_msi_teardown_irq() cleanup (Jisheng Zhang)
    - Fix extended configuration space mapping (Peter Daifuku)
    - Implement resource hierarchy (Thierry Reding)
    - Clear CLKREQ# enable on port disable (Thierry Reding)
    - Add Tegra124 support (Thierry Reding)

  ST Microelectronics SPEAr13xx
    - Pass config resource through reg property (Pratyush Anand)

  Synopsys DesignWare
    - Use NULL instead of false (Fabio Estevam)
    - Parse bus-range property from devicetree (Lucas Stach)
    - Use pci_create_root_bus() instead of pci_scan_root_bus() (Lucas Stach)
    - Remove pci_assign_unassigned_resources() (Lucas Stach)
    - Check private_data validity in single place (Lucas Stach)
    - Setup and clear exactly one MSI at a time (Lucas Stach)
    - Remove open-coded bitmap operations (Lucas Stach)
    - Fix configuration base address when using 'reg' (Minghuan Lian)
    - Fix IO resource end address calculation (Minghuan Lian)
    - Rename get_msi_data() to get_msi_addr() (Minghuan Lian)
    - Add get_msi_data() to pcie_host_ops (Minghuan Lian)
    - Add support for v3.65 hardware (Murali Karicheri)
    - Fold struct pcie_port_info into struct pcie_port (Pratyush Anand)

  TI Keystone
    - Add TI Keystone PCIe driver (Murali Karicheri)
    - Limit MRSS for all downstream devices (Murali Karicheri)
    - Assume controller is already in RC mode (Murali Karicheri)
    - Set device ID based on SoC to support multiple ports (Murali Karicheri)

  Xilinx AXI
    - Add Xilinx AXI PCIe driver (Srikanth Thokala)
    - Fix xilinx_pcie_assign_msi() return value test (Dan Carpenter)

  Miscellaneous
    - Clean up whitespace (Quentin Lambert)
    - Remove assignments from "if" conditions (Quentin Lambert)
    - Move PCI_VENDOR_ID_VMWARE to pci_ids.h (Francesco Ruggeri)
    - x86: Mark DMI tables as initialization data (Mathias Krause)
    - x86: Move __init annotation to the correct place (Mathias Krause)
    - x86: Mark constants of pci_mmcfg_nvidia_mcp55() as __initconst (Mathias Krause)
    - x86: Constify pci_mmcfg_probes[] array (Mathias Krause)
    - x86: Mark PCI BIOS initialization code as such (Mathias Krause)
    - Parenthesize PCI_DEVID and PCI_VPD_LRDT_ID parameters (Megan Kamiya)
    - Remove unnecessary variable in pci_add_dynid() (Tobias Klauser)"

* tag 'pci-v3.18-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/helgaas/pci: (109 commits)
  arm64: dts: Add APM X-Gene PCIe device tree nodes
  PCI: Add ACS quirk for AMD A88X southbridge devices
  PCI: xgene: Add APM X-Gene PCIe driver
  PCI: designware: Remove open-coded bitmap operations
  PCI/MSI: Remove unnecessary temporary variable
  PCI/MSI: Use __write_msi_msg() instead of write_msi_msg()
  MSI/powerpc: Use __read_msi_msg() instead of read_msi_msg()
  PCI/MSI: Use __get_cached_msi_msg() instead of get_cached_msi_msg()
  PCI/MSI: Add "msi_bus" sysfs MSI/MSI-X control for endpoints
  PCI/MSI: Remove "pos" from the struct msi_desc msi_attrib
  PCI/MSI: Remove unused kobject from struct msi_desc
  PCI/MSI: Rename pci_msi_check_device() to pci_msi_supported()
  PCI/MSI: Move D0 check into pci_msi_check_device()
  PCI/MSI: Remove arch_msi_check_device()
  irqchip: armada-370-xp: Remove arch_msi_check_device()
  PCI/MSI/PPC: Remove arch_msi_check_device()
  arm64: Add architectural support for PCI
  PCI: Add pci_remap_iospace() to map bus I/O resources
  of/pci: Add support for parsing PCI host bridge resources from DT
  of/pci: Add pci_get_new_domain_nr() and of_get_pci_domain_nr()
  ...

Conflicts:
	arch/arm64/boot/dts/apm-storm.dtsi
2014-10-09 15:03:49 -04:00
Linus Torvalds ea584595fc This is the bulk of GPIO changes for the v3.18 development
cycle:
 
 - Increase the default ARCH_NR_GPIO from 256 to 512. This
   was done to avoid having a custom <asm/gpio.h> header for
   the x86 architecture - GPIO is custom and complicated
   enough as it is already! We want to move to a radix to
   store the descriptors going forward, and finally get rid
   of this fixed array size altogether.
 
 - Endgame patching of the gpio_remove() semantics initiated
   by Abdoulaye Berthe. It is not accepted by the system that
   the removal of a GPIO chip fails during e.g. reboot or
   shutdown, and therefore the return value has now painfully
   been refactored away. For special cases like GPIO expanders
   on a hot-pluggable bus like USB, we may later add some
   gpiochip_try_remove() call, but for the cases we have now,
   return values are moot.
 
 - Some incremental refactoring of the gpiolib core and ACPI
   GPIO library for more descriptor usage.
 
 - Refactor the chained IRQ handler set-up method to handle
   also threaded, nested interrupts and set up the parent IRQ
   correctly. Switch STMPE and TC3589x drivers to use this
   registration method.
 
 - Add a .irq_not_threaded flag to the struct gpio_chip, so
   that also GPIO expanders that block but are still not
   using threaded IRQ handlers.
 
 - New drivers for the ARM64 X-Gene SoC GPIO controller.
 
 - The syscon GPIO driver has been improved to handle the
   "DSP GPIO" found on the TI Keystone 2 SoC:s.
 
 - ADNP driver switched to use gpiolib irqchip helpers.
 
 - Refactor the DWAPB driver to support being instantiated
   from and MFD cell (platform device).
 
 - Incremental feature improvement in the Zynq, MCP23S08,
   DWAPB, OMAP, Xilinx and Crystalcove drivers.
 
 - Various minor fixes.
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v1
 
 iQIcBAABAgAGBQJUNOr0AAoJEEEQszewGV1z9toP/2ISXRnsi3+jlqVmEGm/y6EA
 PPwJOiYnOhZR2/fTCHIF0PNbIi9pw7xKnzxttYCu4uCz7geHX+FfTwUZ2/KWMfqi
 ZJ9kEoOVVKzKjmL/m2a2tO4IRSBHqJ8dF3yvaNjS3AL7EDfG6F5STErQurdLEynK
 SeJZ2OwM/vRFCac6F7oDlqAUTu3xYGbVD8+zI0H0V/ReocosFlEwcbl2S8ctDWUd
 h98M+gY+A8rxkvVMnmQ/k7rUTme/glDQ3z5xVx+uHbS2/a5M1jSM/71cXE6YnSrR
 it0CK7CHomq2RzHsKf7oH7GD4kFkukMwFKeMoqz75JWz3352VZPTF53chCIqRSgO
 hrgGwZ7WF6pUUUhsn1ZdZsnBPA2Fou2uwslyLSAiE+OYEH2/NSVIOUcorjQcWqU/
 0Kix5yb8X1ZzRMhR+TVrTD5V0jguqp2buXq+0P2XlU6MoO2vy7iNf2eXvPg8sF8C
 anjTCKgmkzy7eyT2uzfDaNZAyfSBKb1TiKiR9zA0SRChJkCi1ErJEXDGeHiptvSA
 +D2k68Ils2LqsvdrnEd2XvVFMllh0iq7b+16o7D+Els0WRbnHpfYCaqfOuF5F4U0
 SmeyI0ruawNDc5e9EBKXstt0/R9AMOetyTcTu29U2ZVo90zGaT1ofT8+R1jJ0kGa
 bPARJZrgecgv1E9Qnnnd
 =8InA
 -----END PGP SIGNATURE-----

Merge tag 'gpio-v3.18-1' of git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-gpio

Pull GPIO changes from Linus Walleij:
 "This is the bulk of GPIO changes for the v3.18 development cycle:

   - Increase the default ARCH_NR_GPIO from 256 to 512.  This was done
     to avoid having a custom <asm/gpio.h> header for the x86
     architecture - GPIO is custom and complicated enough as it is
     already! We want to move to a radix to store the descriptors going
     forward, and finally get rid of this fixed array size altogether.

   - Endgame patching of the gpio_remove() semantics initiated by
     Abdoulaye Berthe.  It is not accepted by the system that the
     removal of a GPIO chip fails during eg reboot or shutdown, and
     therefore the return value has now painfully been refactored away.
     For special cases like GPIO expanders on a hot-pluggable bus like
     USB, we may later add some gpiochip_try_remove() call, but for the
     cases we have now, return values are moot.

   - Some incremental refactoring of the gpiolib core and ACPI GPIO
     library for more descriptor usage.

   - Refactor the chained IRQ handler set-up method to handle also
     threaded, nested interrupts and set up the parent IRQ correctly.
     Switch STMPE and TC3589x drivers to use this registration method.

   - Add a .irq_not_threaded flag to the struct gpio_chip, so that also
     GPIO expanders that block but are still not using threaded IRQ
     handlers.

   - New drivers for the ARM64 X-Gene SoC GPIO controller.

   - The syscon GPIO driver has been improved to handle the "DSP GPIO"
     found on the TI Keystone 2 SoC:s.

   - ADNP driver switched to use gpiolib irqchip helpers.

   - Refactor the DWAPB driver to support being instantiated from and
     MFD cell (platform device).

   - Incremental feature improvement in the Zynq, MCP23S08, DWAPB, OMAP,
     Xilinx and Crystalcove drivers.

   - Various minor fixes"

* tag 'gpio-v3.18-1' of git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-gpio: (52 commits)
  gpio: pch: Build context save/restore only for PM
  pinctrl: abx500: get rid of unused variable
  gpio: ks8695: fix 'else should follow close brace '}''
  gpio: stmpe: add verbose debug code
  gpio: stmpe: fix up interrupt enable logic
  gpio: staticize xway_stp_init()
  gpio: handle also nested irqchips in the chained handler set-up
  gpio: set parent irq on chained handlers
  gpiolib: irqchip: use irq_find_mapping while removing irqchip
  gpio: crystalcove: support virtual GPIO
  pinctrl: bcm281xx: make Kconfig dependency more strict
  gpio: kona: enable only on BCM_MOBILE or for compile testing
  gpio, bcm-kona, LLVMLinux: Remove use of __initconst
  gpio: Fix ngpio in gpio-xilinx driver
  gpio: dwapb: fix pointer to integer cast
  gpio: xgene: Remove unneeded #ifdef CONFIG_OF guard
  gpio: xgene: Remove unneeded forward declation for struct xgene_gpio
  gpio: xgene: Fix missing spin_lock_init()
  gpio: ks8695: fix switch case indentation
  gpiolib: add irq_not_threaded flag to gpio_chip
  ...
2014-10-09 14:58:15 -04:00
Mike Snitzer b8839b8c55 block: fix alignment_offset math that assumes io_min is a power-of-2
The math in both blk_stack_limits() and queue_limit_alignment_offset()
assume that a block device's io_min (aka minimum_io_size) is always a
power-of-2.  Fix the math such that it works for non-power-of-2 io_min.

This issue (of alignment_offset != 0) became apparent when testing
dm-thinp with a thinp blocksize that matches a RAID6 stripesize of
1280K.  Commit fdfb4c8c1 ("dm thin: set minimum_io_size to pool's data
block size") unlocked the potential for alignment_offset != 0 due to
the dm-thin-pool's io_min possibly being a non-power-of-2.

Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Cc: stable@vger.kernel.org
Acked-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
2014-10-09 09:41:40 -06:00
Linus Torvalds 782d59c5df Merge branch 'irq-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull irq updates from Thomas Gleixner:
 "The irq departement delivers:

   - a cleanup series to get rid of mindlessly copied code.

   - another bunch of new pointlessly different interrupt chip drivers.

     Adding homebrewn irq chips (and timers) to SoCs must provide a
     value add which is beyond the imagination of mere mortals.

   - the usual SoC irq controller updates, IOW my second cat herding
     project"

* 'irq-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (44 commits)
  irqchip: gic-v3: Implement CPU PM notifier
  irqchip: gic-v3: Refactor gic_enable_redist to support both enabling and disabling
  irqchip: renesas-intc-irqpin: Add minimal runtime PM support
  irqchip: renesas-intc-irqpin: Add helper variable dev = &pdev->dev
  irqchip: atmel-aic5: Add sama5d4 support
  irqchip: atmel-aic5: The sama5d3 has 48 IRQs
  Documentation: bcm7120-l2: Add Broadcom BCM7120-style L2 binding
  irqchip: bcm7120-l2: Add Broadcom BCM7120-style Level 2 interrupt controller
  irqchip: renesas-irqc: Add binding docs for new R-Car Gen2 SoCs
  irqchip: renesas-irqc: Add DT binding documentation
  irqchip: renesas-intc-irqpin: Document SoC-specific bindings
  openrisc: Get rid of handle_IRQ
  arm64: Get rid of handle_IRQ
  ARM: omap2: irq: Convert to handle_domain_irq
  ARM: imx: tzic: Convert to handle_domain_irq
  ARM: imx: avic: Convert to handle_domain_irq
  irqchip: or1k-pic: Convert to handle_domain_irq
  irqchip: atmel-aic5: Convert to handle_domain_irq
  irqchip: atmel-aic: Convert to handle_domain_irq
  irqchip: gic-v3: Convert to handle_domain_irq
  ...
2014-10-09 06:42:04 -04:00
Linus Torvalds afa3536be8 Merge branch 'timers-nohz-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull timer fixes from Ingo Molnar:
 "Main changes:

  - Fix the deadlock reported by Dave Jones et al
  - Clean up and fix nohz_full interaction with arch abilities
  - nohz init code consolidation/cleanup"

* 'timers-nohz-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  nohz: nohz full depends on irq work self IPI support
  nohz: Consolidate nohz full init code
  arm64: Tell irq work about self IPI support
  arm: Tell irq work about self IPI support
  x86: Tell irq work about self IPI support
  irq_work: Force raised irq work to run on irq work interrupt
  irq_work: Introduce arch_irq_work_has_interrupt()
  nohz: Move nohz full init call to tick init
2014-10-09 06:30:57 -04:00
Maarten ter Huurne 33ac9dba85 fonts: Add 6x10 font
This font is suitable for framebuffer consoles on devices with a
320x240 screen, to get a reasonable number of characters (53x24) that
are still at a readable size.

The font is derived from the existing 6x11 font, but gets 3 extra
lines without sacrificing readability. Also I redesigned a some glyhps
so they are more distinct and better fill the available space.

Signed-off-by: Maarten ter Huurne <maarten@treewalker.org>
Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ti.com>
2014-10-09 11:35:48 +03:00
Martin Schwidefsky fe0f49768d s390/nohz: use a per-cpu flag for arch_needs_cpu
Move the nohz_delay bit from the s390_idle data structure to the
per-cpu flags. Clear the nohz delay flag in __cpu_disable and
remove the cpu hotplug notifier that used to do this.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
2014-10-09 09:14:02 +02:00
Sagi Grimberg 142537f4e5 IB/mlx5: Use extended internal signature layout
Rather than using the basic BSF layout which utilizes a pre-configured
signature settings (sufficient for current DIF implementation), we use
the extended BSF layout to expose advanced signature settings. These
settings will also be exposed to the user later.

Signed-off-by: Sagi Grimberg <sagig@mellanox.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
2014-10-09 00:10:53 -07:00
Sagi Grimberg fd22f78cf7 IB/mlx5: Use enumerations for PI copy mask
In case input and output space parameters match, we can use a copy
mask from input and output space.  Use enums for those.

Signed-off-by: Sagi Grimberg <sagig@mellanox.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
2014-10-09 00:10:53 -07:00
Seunghun Lee 5e6123f347 vfs: move getname() from callers to do_mount()
It would make more sense to pass char __user * instead of
char * in callers of do_mount() and do getname() inside do_mount().

Suggested-by: Al Viro <viro@ZenIV.linux.org.uk>
Signed-off-by: Seunghun Lee <waydi1@gmail.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2014-10-09 02:39:16 -04:00
Matthew Wilcox c35e024800 Add copy_to_iter(), copy_from_iter() and iov_iter_zero()
For DAX, we want to be able to copy between iovecs and kernel addresses
that don't necessarily have a struct page.  This is a fairly simple
rearrangement for bvec iters to kmap the pages outside and pass them in,
but for user iovecs it gets more complicated because we might try various
different ways to kmap the memory.  Duplicating the existing logic works
out best in this case.

We need to be able to write zeroes to an iovec for reads from unwritten
ranges in a file.  This is performed by the new iov_iter_zero() function,
again patterned after the existing code that handles iovec iterators.

[AV: and export the buggers...]

Signed-off-by: Matthew Wilcox <willy@linux.intel.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2014-10-09 02:39:03 -04:00
Al Viro 1fa97e8b1f constify file_inode()
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2014-10-09 02:39:00 -04:00
Eric W. Biederman 5542aa2fa7 vfs: Make d_invalidate return void
Now that d_invalidate can no longer fail, stop returning a useless
return code.  For the few callers that checked the return code update
remove the handling of d_invalidate failure.

Reviewed-by: Miklos Szeredi <miklos@szeredi.hu>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2014-10-09 02:38:57 -04:00
Eric W. Biederman 1ffe46d11c vfs: Merge check_submounts_and_drop and d_invalidate
Now that d_invalidate is the only caller of check_submounts_and_drop,
expand check_submounts_and_drop inline in d_invalidate.

Reviewed-by: Miklos Szeredi <miklos@szeredi.hu>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2014-10-09 02:38:57 -04:00
Linus Torvalds 35a9ad8af0 Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller:
 "Most notable changes in here:

   1) By far the biggest accomplishment, thanks to a large range of
      contributors, is the addition of multi-send for transmit.  This is
      the result of discussions back in Chicago, and the hard work of
      several individuals.

      Now, when the ->ndo_start_xmit() method of a driver sees
      skb->xmit_more as true, it can choose to defer the doorbell
      telling the driver to start processing the new TX queue entires.

      skb->xmit_more means that the generic networking is guaranteed to
      call the driver immediately with another SKB to send.

      There is logic added to the qdisc layer to dequeue multiple
      packets at a time, and the handling mis-predicted offloads in
      software is now done with no locks held.

      Finally, pktgen is extended to have a "burst" parameter that can
      be used to test a multi-send implementation.

      Several drivers have xmit_more support: i40e, igb, ixgbe, mlx4,
      virtio_net

      Adding support is almost trivial, so export more drivers to
      support this optimization soon.

      I want to thank, in no particular or implied order, Jesper
      Dangaard Brouer, Eric Dumazet, Alexander Duyck, Tom Herbert, Jamal
      Hadi Salim, John Fastabend, Florian Westphal, Daniel Borkmann,
      David Tat, Hannes Frederic Sowa, and Rusty Russell.

   2) PTP and timestamping support in bnx2x, from Michal Kalderon.

   3) Allow adjusting the rx_copybreak threshold for a driver via
      ethtool, and add rx_copybreak support to enic driver.  From
      Govindarajulu Varadarajan.

   4) Significant enhancements to the generic PHY layer and the bcm7xxx
      driver in particular (EEE support, auto power down, etc.) from
      Florian Fainelli.

   5) Allow raw buffers to be used for flow dissection, allowing drivers
      to determine the optimal "linear pull" size for devices that DMA
      into pools of pages.  The objective is to get exactly the
      necessary amount of headers into the linear SKB area pre-pulled,
      but no more.  The new interface drivers use is eth_get_headlen().
      From WANG Cong, with driver conversions (several had their own
      by-hand duplicated implementations) by Alexander Duyck and Eric
      Dumazet.

   6) Support checksumming more smoothly and efficiently for
      encapsulations, and add "foo over UDP" facility.  From Tom
      Herbert.

   7) Add Broadcom SF2 switch driver to DSA layer, from Florian
      Fainelli.

   8) eBPF now can load programs via a system call and has an extensive
      testsuite.  Alexei Starovoitov and Daniel Borkmann.

   9) Major overhaul of the packet scheduler to use RCU in several major
      areas such as the classifiers and rate estimators.  From John
      Fastabend.

  10) Add driver for Intel FM10000 Ethernet Switch, from Alexander
      Duyck.

  11) Rearrange TCP_SKB_CB() to reduce cache line misses, from Eric
      Dumazet.

  12) Add Datacenter TCP congestion control algorithm support, From
      Florian Westphal.

  13) Reorganize sk_buff so that __copy_skb_header() is significantly
      faster.  From Eric Dumazet"

* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1558 commits)
  netlabel: directly return netlbl_unlabel_genl_init()
  net: add netdev_txq_bql_{enqueue, complete}_prefetchw() helpers
  net: description of dma_cookie cause make xmldocs warning
  cxgb4: clean up a type issue
  cxgb4: potential shift wrapping bug
  i40e: skb->xmit_more support
  net: fs_enet: Add NAPI TX
  net: fs_enet: Remove non NAPI RX
  r8169:add support for RTL8168EP
  net_sched: copy exts->type in tcf_exts_change()
  wimax: convert printk to pr_foo()
  af_unix: remove 0 assignment on static
  ipv6: Do not warn for informational ICMP messages, regardless of type.
  Update Intel Ethernet Driver maintainers list
  bridge: Save frag_max_size between PRE_ROUTING and POST_ROUTING
  tipc: fix bug in multicast congestion handling
  net: better IFF_XMIT_DST_RELEASE support
  net/mlx4_en: remove NETDEV_TX_BUSY
  3c59x: fix bad split of cpu_to_le32(pci_map_single())
  net: bcmgenet: fix Tx ring priority programming
  ...
2014-10-08 21:40:54 -04:00
Linus Torvalds 8b45bc892e ARM: SoC driver updates for 3.18
These are changes for drivers that are intimately tied to some SoC
 and for some reason could not get merged through the respective
 subsystem maintainer tree.
 
 Most of the new code is for the Keystone Navigator driver, which is
 new base support that is going to be needed for their hardware
 accelerated network driver and other units.
 
 Most of the commits are for moving old code around from at91 and omap
 for things that are done in device drivers nowadays.
 
 - at91: move reset, poweroff, memory and clocksource code into drivers
   directories
 - socfpga: add edac driver (through arm-soc, as requested by Boris)
 - omap: move omap-intc code to drivers/irqchip
 - sunxi: added an RTC driver for sun6i
 - omap: mailbox driver related changes
 - keystone: support for the "Navigator" component
 - versatile: new reboot, led and soc drivers
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v1.4.12 (GNU/Linux)
 
 iQIVAwUAVDWWQGCrR//JCVInAQKX7Q//bDkoseKCZsGaXN7vfQ2YhT3SAc52mROV
 YQKdNmtMUrHqDgngATZTx5ogOh1hInnqueFjGGhfMYsHQO1Vj8+odj0r+4jhjuUY
 3YfY+qZ+91tq33JlUOhKn+mfVMdxJc8XarGgR6MSWYkqWVYCtLtBluum7hKm2UJ6
 /e4hd2zzImX5ATwj/LXWLx5eTf1qAVFGWzNUph1DrW+1V5lOu58X4gKwk1QOCVEh
 Pa0GV9oRTkjoswwz9drzjeFtie2yofQ2mygj6QKxg5NsosIF0+B8kJ61Sxwg56Ak
 tF+qn1hGtB2cDQkpxK4o2cZgCELhkh5Aqgol/vZUS1DMBSUEGCV9PPp2eOW83r3B
 0zsTgsShyVcTh7khdpQmHNRigvcc7e69LaAGC4o/RxaZpCU/LUNCQ+/iqVExSE8A
 VNEXr+JNxGxhj3m9KUHuEktdWx1oNvaYR8Rr4RPr6EWR8R6emJ04I7kXInvzhJZL
 HOGh75vSuAU83FrsP8fFRLadoHNVDXylAs38BPfGEMngVpjvwJLgQ3+729CwW+Q4
 +xQXAKSwKfr8xA8eg6wBSbFcwnEW4QwRqFqQ5XPw7zTZkCZbiLtvn3JpI5bH5A5Q
 /d2D+M2vFbB7VbWJBM4etO95eNS/pfhqJhcQh4t0DjXjoW6WqLiHCxhEx8Ogfvop
 /4ckyGvtEOI=
 =POJD
 -----END PGP SIGNATURE-----

Merge tag 'drivers-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/arm/arm-soc

Pull ARM SoC driver updates from Arnd Bergmann:
 "These are changes for drivers that are intimately tied to some SoC and
  for some reason could not get merged through the respective subsystem
  maintainer tree.

  Most of the new code is for the Keystone Navigator driver, which is
  new base support that is going to be needed for their hardware
  accelerated network driver and other units.

  Most of the commits are for moving old code around from at91 and omap
  for things that are done in device drivers nowadays.

   - at91: move reset, poweroff, memory and clocksource code into
     drivers directories
   - socfpga: add edac driver (through arm-soc, as requested by Boris)
   - omap: move omap-intc code to drivers/irqchip
   - sunxi: added an RTC driver for sun6i
   - omap: mailbox driver related changes
   - keystone: support for the "Navigator" component
   - versatile: new reboot, led and soc drivers"

* tag 'drivers-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/arm/arm-soc: (92 commits)
  bus: arm-ccn: Fix spurious warning message
  leds: add device tree bindings for register bit LEDs
  soc: add driver for the ARM RealView
  power: reset: driver for the Versatile syscon reboot
  leds: add a driver for syscon-based LEDs
  drivers/soc: ti: fix build break with modules
  MAINTAINERS: Add Keystone Multicore Navigator drivers entry
  soc: ti: add Keystone Navigator DMA support
  Documentation: dt: soc: add Keystone Navigator DMA bindings
  soc: ti: add Keystone Navigator QMSS driver
  Documentation: dt: soc: add Keystone Navigator QMSS bindings
  rtc: sunxi: Depend on platforms sun4i/sun7i that actually have the rtc
  rtc: sun6i: Add sun6i RTC driver
  irqchip: omap-intc: remove unnecessary comments
  irqchip: omap-intc: correct maximum number or MIR registers
  irqchip: omap-intc: enable TURBO idle mode
  irqchip: omap-intc: enable IP protection
  irqchip: omap-intc: remove unnecesary of_address_to_resource() call
  irqchip: omap-intc: comment style cleanup
  irqchip: omap-intc: minor improvement to omap_irq_pending()
  ...
2014-10-08 17:37:16 -04:00
Linus Torvalds cf377ad7d4 ARM: SoC platform changes for 3.18
New and updated SoC support. Among the things new for this release are:
 
 - at91: Added support for the new SAMA5D4 SoC, following the earlier SAMA5D3
 - bcm: Added support for BCM63XX family of DSL SoCs
 - hisi: Added support for HiP04 server-class SoC
 - meson: Initial support for the Amlogic Meson6 (aka 8726MX) platform
 - shmobile: added support for new r8a7794 (R-Car E2) automotive SoC
 
 Noteworthy changes to existing SoC support are:
 
 - imx: convert i.MX1 to device tree
 - omap: lots of power management work
 - omap: base support to enable moving to standard UART driver
 - shmobile: lots of progress for multiplatform support, still ongoing
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v1.4.12 (GNU/Linux)
 
 iQIVAwUAVDWVHWCrR//JCVInAQJQVw/+NEfKWh6blDvLEWHpkmBtzdsT3s+r1wwb
 ATtvd1Q7RlOMEbzxc2J87tJ44yHb64mSPBbC4BCGuQsM5IIvM4potmBphl/XxLfd
 b8PNcI6nvLO+FZOcgon0JWmvVnt+vLGKPUWzURXSRjdrpVRg2qyRpW+nPBnvX4HP
 qyzlSskkYzKm7WJQrIV1K3yYwRLrVZdz4DuF340mSFy+4H+uci2Fw91HJ9lKKmPS
 24Klx2Q4n6wfg946WazWtz21HjEBuMzRCq0CGZrwcTJffRyMxa4iq/kqE3xGbPtN
 onuP1gmAM7UOMewEvc1ZLycY7JyZ3mhKnKduqS/QN2JLLQEY2v1iYFnEKP8mHnnw
 ax6RVi91PC2MSLZyPcRtsegSKB9l16I7H+C5pgTOMgsSaqxSG1JtV1qZl3uwhBnE
 GB45KHPvTFojrH2+CqneNTLET1ozKgwtuHkWTG61/puYeap/VlpRU2OWj2mQF2E0
 SiBzmlbUBpSqzjFgVGD4ywKAuVA/WpJtaOB7Qg26GL2QoNKrY/wsUCY8hU742+jE
 b/N6obGcpmjytLkFRHx+AbYc75DHXkPtF4CWawDeQFW30LUeixZJqewQ61a56QF8
 49DbO6J+sR0n3xlteD49QdQJzDCtKw3BV+VQaFRcxqVDq4LJAxtUHJZ7c3iyvzEi
 6Yt+PsqSP7Y=
 =ZHtj
 -----END PGP SIGNATURE-----

Merge tag 'soc-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/arm/arm-soc

Pull ARM SoC platform changes from Arnd Bergmann:
 "New and updated SoC support.  Among the things new for this release
  are:

   - at91: Added support for the new SAMA5D4 SoC, following the earlier
     SAMA5D3
   - bcm: Added support for BCM63XX family of DSL SoCs
   - hisi: Added support for HiP04 server-class SoC
   - meson: Initial support for the Amlogic Meson6 (aka 8726MX) platform
   - shmobile: added support for new r8a7794 (R-Car E2) automotive SoC

  Noteworthy changes to existing SoC support are:

   - imx: convert i.MX1 to device tree
   - omap: lots of power management work
   - omap: base support to enable moving to standard UART driver
   - shmobile: lots of progress for multiplatform support, still
     ongoing"

* tag 'soc-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/arm/arm-soc: (171 commits)
  ARM: hisi: depend on ARCH_MULTI_V7
  CNS3xxx: Fix debug UART.
  ARM: at91: fix nommu build regression
  ARM: meson: add basic support for MesonX SoCs
  ARM: meson: debug: add debug UART for earlyprintk support
  irq: Export handle_fasteoi_irq
  ARM: mediatek: Add earlyprintk support for mt6589
  ARM: hisi: Fix platmcpm compilation when ARMv6 is selected
  ARM: debug: fix alphanumerical order on debug uarts
  ARM: at91: document Atmel SMART compatibles
  ARM: at91: add sama5d4 support to sama5_defconfig
  ARM: at91: dt: add device tree file for SAMA5D4ek board
  ARM: at91: dt: add device tree file for SAMA5D4 SoC
  ARM: at91: SAMA5D4 SoC detection code and low level routines
  ARM: at91: introduce basic SAMA5D4 support
  clk: at91: add a driver for the h32mx clock
  ARM: pxa3xx: provide specific platform_devices for all ssp ports
  ARM: pxa: ssp: provide platform_device_id for PXA3xx
  ARM: OMAP4+: Remove static iotable mappings for SRAM
  ARM: OMAP4+: Move SRAM data to DT
  ...
2014-10-08 17:13:04 -04:00
Linus Torvalds 212fe84a6f ARM: SoC cleanups for 3.18
This time around, the cleanup branch contains mostly code removal. A number
 of board files for at91, imx and msm have become obsolete because of the
 DT conversion and are now ready to be removed. The OMAP platform has
 traditionally had its own DMA engine abstraction and as this is being
 phased out, a lot of the original code is now unused and can be removed
 as well.
 
 S3C24xx can be simplified now that the restart code is a proper device
 driver.
 
 Finally, a number of cleanups in shmobile are done to prepare for
 the addition of new code in other branches.
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v1.4.12 (GNU/Linux)
 
 iQIVAwUAVDWVEmCrR//JCVInAQIt+Q/+P9ABxjC/IjbTi2PN1MtBaaiFZajMj9pA
 h6UCsS08sUiOlr+jjl623evQqYswRk4aoOoPS6AiOYt0xUlAJ7euoOvo82Rjc+jy
 Yc4kRx/l7KffhaFGL2zQ0iLax9BH3fkAU5+fOEkq4QWpXzCX9WKWG+x6QYNnuRxy
 Y2jhjH13s/0EKl4hCBBS8LfEWyKmaFBwzKIVTS5IyBPrmu3dGaQS+zM0O65fWY6a
 eLMlsCgnfre4doy60YlZSNLA6Wc9gdBgyVi4wD90PVVnvs/IiSuS/6QABozKRp2t
 p+OH8Apgb1W+y5RL9+3nvTAF/jbLRhu3P7/DJcpQcd35RSDUdqCvQnNyaz4j/P3i
 hsZ67gY/1gIeI09vRHnnZL2Z6whmWVk4bhY9j0bbEnAjvtizWxmJxbboOQQQ0rv6
 UO2oqJ9hN99tf8aKiKK9//DLAAVoRHgJSDxgcC10XcH6JZzACX/9BFNC0X23gWBp
 QygtHFA6kSPW243j26/KVOjP/eUkekEVn89nqVwI3jJI2pwAjylFVEH3kNAZ/R88
 J68V4nwkqrQyAHG/WQo6GDjl5NQ/JeIUHvaAfjMUvFOIiaotkxMrw56hLGk2n1ZN
 QxlnxQJX+w+VvcUhuzu0YPdZnb5AWRV2R13JAoYBCfwWviqBDXJxysTA//4OR29p
 JAqXJqzGync=
 =Af7T
 -----END PGP SIGNATURE-----

Merge tag 'cleanup-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/arm/arm-soc

Pull ARM SoC cleanups from Arnd Bergmann:
 "This time around, the cleanup branch contains mostly code removal.  A
  number of board files for at91, imx and msm have become obsolete
  because of the DT conversion and are now ready to be removed.  The
  OMAP platform has traditionally had its own DMA engine abstraction and
  as this is being phased out, a lot of the original code is now unused
  and can be removed as well.

  S3C24xx can be simplified now that the restart code is a proper device
  driver.

  Finally, a number of cleanups in shmobile are done to prepare for the
  addition of new code in other branches"

* tag 'cleanup-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/arm/arm-soc: (43 commits)
  ARM: at91: Remove the support for the RSI EWS board
  arm: mach-omap2: Convert pr_warning to pr_warn
  ARM: OMAP: Remove unused pieces of legacy DMA API
  ARM: at91: remove board file for Acme Systems Fox G20
  ARM: orion5x: Convert pr_warning to pr_warn
  ARM: S3C24XX: remove separate restart code
  ARM: EXYNOS: Do not calculate boot address twice
  ARM: sunxi: Remove sun4i reboot code from mach directory
  ARM: imx: Remove mach-mxt_td60 board file
  ARM: shmobile: armadillo800eva legacy: Use rmobile_add_devices_to_domains()
  ARM: shmobile: r8a7740: Clean up pm domain table
  ARM: shmobile: r8a7740: Use rmobile_add_devices_to_domains()
  ARM: shmobile: sh7372: Make domain_devices[] static __initdata
  ARM: shmobile: mackerel: Make domain_devices[] static __initdata
  clocksource: tcb_clksrc: sanitize IRQ request
  ARM: at91/tclib: mask interruptions at shutdown and probe
  ARM: at91/tclib: move initialization from alloc to probe
  ARM: at91/tclib: prefer using of devm_* functions
  ARM: clps711x: Switch CLPS711X subarch to use clk and clocksource driver
  ARM: shmobile: r8a7791 is now called "R-Car M2-W"
  ...
2014-10-08 17:06:53 -04:00
Eric Dumazet 535114539b net: add netdev_txq_bql_{enqueue, complete}_prefetchw() helpers
Add two helpers so that drivers do not have to care of BQL being
available or not.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Jim Davis <jim.epost@gmail.com>
Fixes: 29d40c9032 ("net/mlx4_en: Use prefetch in tx path")
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-10-08 16:08:04 -04:00
Masanari Iida 709c48b39e net: description of dma_cookie cause make xmldocs warning
In commit 7bced39751,
dma_cookie was removed from struct skbuff.
But the description of dma_cookie still exist.
So the "make xmldocs" output following warning.

Warning(.//include/linux/skbuff.h:609): Excess struct/union
/enum/typedef member 'dma_cookie' description in 'sk_buff'

Remove description of dma_cookie fix the symptom.

Signed-off-by: Masanari Iida <standby24x7@gmail.com>
Acked-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-10-08 16:08:04 -04:00
Andy Lutomirski 68939df1d7 Move Intel SNB device ids from sb_edac to pci_ids.h
The i2c_imc driver will use two of them, and moving only part of
the list seems messier.

Signed-off-by: Andy Lutomirski <luto@amacapital.net>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Aristeu Rozanski <aris@redhat.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
2014-10-08 17:04:16 -03:00
LEROY Christophe 583d4a6885 net: fs_enet: Remove non NAPI RX
In the probe function, use_napi is inconditionnaly set to 1. This patch removes
all the code which is conditional to !use_napi, and removes use_napi which has
then become useless.

Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-10-08 16:01:41 -04:00
Linus Torvalds da01e61428 Merge tag 'f2fs-for-3.18' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs
Pull f2fs updates from Jaegeuk Kim:
 "This patch-set introduces a couple of new features such as large
  sector size, FITRIM, and atomic/volatile writes.

  Several patches enhance power-off recovery and checkpoint routines.

  The fsck.f2fs starts to support fixing corrupted partitions with
  recovery hints provided by this patch-set.

  Summary:
   - retain some recovery information for fsck.f2fs
   - enhance checkpoint speed
   - enhance flush command management
   - bug fix for lseek
   - tune in-place-update policies
   - enhance roll-forward speed
   - revisit all the roll-forward and fsync rules
   - support larget sector size
   - support FITRIM
   - support atomic and volatile writes

  And several clean-ups and bug fixes are included"

* tag 'f2fs-for-3.18' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs: (42 commits)
  f2fs: support volatile operations for transient data
  f2fs: support atomic writes
  f2fs: remove unused return value
  f2fs: clean up f2fs_ioctl functions
  f2fs: potential shift wrapping buf in f2fs_trim_fs()
  f2fs: call f2fs_unlock_op after error was handled
  f2fs: check the use of macros on block counts and addresses
  f2fs: refactor flush_nat_entries to remove costly reorganizing ops
  f2fs: introduce FITRIM in f2fs_ioctl
  f2fs: introduce cp_control structure
  f2fs: use more free segments until SSR is activated
  f2fs: change the ipu_policy option to enable combinations
  f2fs: fix to search whole dirty segmap when get_victim
  f2fs: fix to clean previous mount option when remount_fs
  f2fs: skip punching hole in special condition
  f2fs: support large sector size
  f2fs: fix to truncate blocks past EOF in ->setattr
  f2fs: update i_size when __allocate_data_block
  f2fs: use MAX_BIO_BLOCKS(sbi)
  f2fs: remove redundant operation during roll-forward recovery
  ...
2014-10-08 12:53:15 -04:00
Linus Torvalds 6dea0737bc Merge branch 'for-3.18' of git://linux-nfs.org/~bfields/linux
Pull nfsd updates from Bruce Fields:
 "Highlights:

   - support the NFSv4.2 SEEK operation (allowing clients to support
     SEEK_HOLE/SEEK_DATA), thanks to Anna.
   - end the grace period early in a number of cases, mitigating a
     long-standing annoyance, thanks to Jeff
   - improve SMP scalability, thanks to Trond"

* 'for-3.18' of git://linux-nfs.org/~bfields/linux: (55 commits)
  nfsd: eliminate "to_delegation" define
  NFSD: Implement SEEK
  NFSD: Add generic v4.2 infrastructure
  svcrdma: advertise the correct max payload
  nfsd: introduce nfsd4_callback_ops
  nfsd: split nfsd4_callback initialization and use
  nfsd: introduce a generic nfsd4_cb
  nfsd: remove nfsd4_callback.cb_op
  nfsd: do not clear rpc_resp in nfsd4_cb_done_sequence
  nfsd: fix nfsd4_cb_recall_done error handling
  nfsd4: clarify how grace period ends
  nfsd4: stop grace_time update at end of grace period
  nfsd: skip subsequent UMH "create" operations after the first one for v4.0 clients
  nfsd: set and test NFSD4_CLIENT_STABLE bit to reduce nfsdcltrack upcalls
  nfsd: serialize nfsdcltrack upcalls for a particular client
  nfsd: pass extra info in env vars to upcalls to allow for early grace period end
  nfsd: add a v4_end_grace file to /proc/fs/nfsd
  lockd: add a /proc/fs/lockd/nlm_end_grace file
  nfsd: reject reclaim request when client has already sent RECLAIM_COMPLETE
  nfsd: remove redundant boot_time parm from grace_done client tracking op
  ...
2014-10-08 12:51:44 -04:00
Linus Torvalds 25641c0c8d NFS client updates for Linux 3.18
Highlights include:
 
 Stable fixes:
 - fix an NFSv4.1 state renewal regression
 - fix open/lock state recovery error handling
 - fix lock recovery when CREATE_SESSION/SETCLIENTID_CONFIRM fails
 - fix statd when reconnection fails
 - Don't wake tasks during connection abort
 - Don't start reboot recovery if lease check fails
 - fix duplicate proc entries
 
 Features:
 - pNFS block driver fixes and clean ups from Christoph
 - More code cleanups from Anna
 - Improve mmap() writeback performance
 - Replace use of PF_TRANS with a more generic mechanism for avoiding
   deadlocks in nfs_release_page
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v1
 
 iQIcBAABAgAGBQJUMpFYAAoJEGcL54qWCgDywHYP/A7XNykwOGhoHVP1Cgr3xqoz
 gVhAw97AEMZE8xSNVEGS++pJTe59JVzsIsYAwdHMwePV33l3zyzYorae6N9p7zWF
 0xVaNQ4qNLVhbrNLAoB5KA/c3/jMnNjF5t15+8akZad5pt4kXLlhSKjyVpdEEtJE
 A0eneXShMYEeLZoOJhpQt5bsw0OZ8YbWWEMjGlDqyeelvV3K1+zfivQOoyX6hS4w
 XFkPEDmU7zunE/xFP9ZoUaVdLO0TvOWfEZ7STWoHm7NuWfPQiDb9w1mTnuZbZyka
 ssezoGcitzwsjCcQ5e1iKTOoFRIsm/zYXFQgFQL7VFMBU1Tss9Of8047EyDkqcPF
 GxctsGg0gQ2FkG7yx7JH7AKpyibOIuByQrQQ916coWSf7K0L4H4Rcky3vryroylP
 1e1RI49xu215OTm+dLvlvYCv55bqCrTmaUGImZac18+ixD2eh6MNfW2ubSdxk89L
 U2rTFV09Bd52N7IQOGQx1FBEI2ZnIFUV4UaFz7v+rGFxOnk6+WYe+iWyb4wC70Yc
 8Jh/gTIQDd5aghql3FTieMOyfEvO6Re4pLMXmqEWMAevicx2t8DwkJriRu6X8Iy2
 rlDlBPwu5QmRWC20Dc897f0VajwDtwdeB8puod7nobOWzOfx4FrNqLJ+jR3pmHUk
 0otvJytqemXt+zkqqHKK
 =/OQi
 -----END PGP SIGNATURE-----

Merge tag 'nfs-for-3.18-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs

Pull NFS client updates from Trond Myklebust:
 "Highlights include:

  Stable fixes:
   - fix an NFSv4.1 state renewal regression
   - fix open/lock state recovery error handling
   - fix lock recovery when CREATE_SESSION/SETCLIENTID_CONFIRM fails
   - fix statd when reconnection fails
   - don't wake tasks during connection abort
   - don't start reboot recovery if lease check fails
   - fix duplicate proc entries

  Features:
  - pNFS block driver fixes and clean ups from Christoph
  - More code cleanups from Anna
  - Improve mmap() writeback performance
  - Replace use of PF_TRANS with a more generic mechanism for avoiding
    deadlocks in nfs_release_page"

* tag 'nfs-for-3.18-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs: (66 commits)
  NFSv4.1: Fix an NFSv4.1 state renewal regression
  NFSv4: fix open/lock state recovery error handling
  NFSv4: Fix lock recovery when CREATE_SESSION/SETCLIENTID_CONFIRM fails
  NFS: Fabricate fscache server index key correctly
  SUNRPC: Add missing support for RPC_CLNT_CREATE_NO_RETRANS_TIMEOUT
  NFSv3: Fix missing includes of nfs3_fs.h
  NFS/SUNRPC: Remove other deadlock-avoidance mechanisms in nfs_release_page()
  NFS: avoid waiting at all in nfs_release_page when congested.
  NFS: avoid deadlocks with loop-back mounted NFS filesystems.
  MM: export page_wakeup functions
  SCHED: add some "wait..on_bit...timeout()" interfaces.
  NFS: don't use STABLE writes during writeback.
  NFSv4: use exponential retry on NFS4ERR_DELAY for async requests.
  rpc: Add -EPERM processing for xs_udp_send_request()
  rpc: return sent and err from xs_sendpages()
  lockd: Try to reconnect if statd has moved
  SUNRPC: Don't wake tasks during connection abort
  Fixing lease renewal
  nfs: fix duplicate proc entries
  pnfs/blocklayout: Fix a 64-bit division/remainder issue in bl_map_stripe
  ...
2014-10-08 12:49:23 -04:00
Linus Torvalds ef0625b70d Char/Misc driver patches for 3.18-rc1
Here's the big set of driver patches for char/misc drivers.  Nothing
 major in here, the shortlog below goes into the details.  All have been
 in the linux-next tree for a while with no issues.
 
 Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v2
 
 iEYEABECAAYFAlQ0ZXYACgkQMUfUDdst+ymiEgCgrKcYUluvdrbjdkhrENk332YN
 lcUAoMzgQpbkYhswrDNQet7NtAbFN9LV
 =ZPDy
 -----END PGP SIGNATURE-----

Merge tag 'char-misc-3.18-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc

Pull char/misc driver updates from Greg KH:
 "Here's the big set of driver patches for char/misc drivers.  Nothing
  major in here, the shortlog goes into the details.  All have been in
  the linux-next tree for a while with no issues"

* tag 'char-misc-3.18-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc: (80 commits)
  mei: mei_txe_fw_sts can be static
  mei: fix kernel-doc warnings
  mei: fix KDoc documentation formatting
  mei: drop me_client_presentation_num
  mei: trivial: fix errors in prints in comments
  mei: remove include to pci header from mei module files
  mei: push pci cfg structure me hw
  mei: remove the reference to pdev from mei_device
  mei: move fw_status back to hw ops handlers
  mei: get rid of most of the pci dependencies in mei
  mei: push all standard settings into mei_device_init
  mei: move mei_hbm_hdr function from hbm.h the hbm.c
  mei: kill error message for allocation failure
  mei: nfc: fix style warning
  mei: fix style warning: Missing a blank line after declarations
  mei: pg: fix cat and paste error in comments
  mei: debugfs: add single buffer indicator
  mei: debugfs: adjust print buffer
  mei: add hbm and pg state in devstate debugfs print
  Drivers: hv: vmbus: Enable interrupt driven flow control
  ...
2014-10-08 06:55:41 -04:00
Linus Torvalds bca51651fc Driver core patches for 3.18-rc1
Here's the driver core patches for 3.18-rc1.  Just a few small things,
 and the addition of a new interface to dump firmware "core dumps" to
 userspace through sysfs that the wireless and graphic drivers want to
 use.
 
 All of these have been in linux-next for a while.
 
 Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v2
 
 iEYEABECAAYFAlQ0ZgwACgkQMUfUDdst+ymh9ACfZi5TG1AD8/EesCoKaoTd4yJZ
 QOcAnjISbF9IKL1ia1fESqFYyTO+XqrP
 =YdeX
 -----END PGP SIGNATURE-----

Merge tag 'driver-core-3.18-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/driver-core

Pull driver core update from Greg KH:
 "Here's the driver core patches for 3.18-rc1.  Just a few small things,
  and the addition of a new interface to dump firmware "core dumps" to
  userspace through sysfs that the wireless and graphic drivers want to
  use.

  All of these have been in linux-next for a while"

* tag 'driver-core-3.18-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/driver-core:
  dynamic_debug: change __dynamic_<foo>_dbg return types to void
  driver/base/node: remove unnecessary kfree of node struct from unregister_one_node
  devres: Improve devm_kasprintf()/kvasprintf() support
  Documentation: devres: Add missing devm_kstrdup() managed interface
  Documentation: devres: Add missing IRQ functions
  firmware_class: make sure fw requests contain a name
  driver core: Remove kerneldoc from local function
  attribute_container: fix coding style issues
  attribute_container: fix whitespace errors
  drivers/base: Fix length checks in create_syslog_header()/dev_vprintk_emit()
  device coredump: add new device coredump class
  Documentation/sysfs-rules.txt: Add device attribute error code documentation
2014-10-08 06:53:19 -04:00
Linus Torvalds 683a52a101 TTY/Serial driver patches for 3.18-rc1
Here's the big tty/serial driver patchset for 3.18-rc1.
 
 Lots of little things in here, some good work from Peter Hurley on the
 tty core, and in lots of drivers.  There are also lots of other driver
 updates in here as well, full details in the changelog below.
 
 All have been in the linux-next tree for a while.
 
 Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v2
 
 iEYEABECAAYFAlQ0aDwACgkQMUfUDdst+ymueACeI1i2exlGaBBSVQuUK2Jmx8Uz
 nukAn3KPuvvx+MKfMMBRpK0DQCzTxv4P
 =dwv1
 -----END PGP SIGNATURE-----

Merge tag 'tty-3.18-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/tty

Pull tty/serial driver updates from Greg KH:
 "Here's the big tty/serial driver patchset for 3.18-rc1.

  Lots of little things in here, some good work from Peter Hurley on the
  tty core, and in lots of drivers.  There are also lots of other driver
  updates in here as well, full details in the changelogs.

  All have been in the linux-next tree for a while"

* tag 'tty-3.18-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/tty: (99 commits)
  Revert "serial/core: Initialize the console pm state"
  tty: serial: 8250: use 32bit variable for rpm_tx_active
  tty: serial: msm: Add earlycon support
  serial/core: Initialize the console pm state
  serial: asc: Conditionally use readl_relaxed (COMPILE_TEST)
  serial: of-serial: add PM suspend/resume support
  m68k: AMIGA_BUILTIN_SERIAL should depend on TTY
  asm/uapi: Add definition of TIOC[SG]RS485
  tty/metag_da: Add console_poll module parameter
  serial: 8250_pci: remove rts_n override from Baytrail quirk
  serial: cadence: Add generic earlycon support
  serial: imx: change the wait even to interruptiable
  serial: imx: terminate the RX DMA when the UART is suspending
  serial: imx: fix throttle/unthrottle callbacks for hardware assisted flow control
  serial: 8250: Add Quark X1000 to 8250_pci.c
  tty: omap-serial: pull out calculation from baud_is_mode16
  tty: omap-serial: fix division by zero
  xen_hvc: no reason to write the type key on xenstore
  tty: serial: 8250_core: remove UART_IER_RDI in serial8250_stop_rx()
  tty: serial: 8250_core: use the ->line argument as a hint in serial8250_find_match_or_unused()
  ...
2014-10-08 06:52:11 -04:00
Linus Torvalds 463311960e USB patches for 3.18-rc1
Here's the big USB patchset for 3.18-rc1.  Also in here is the PHY tree,
 as it seems to fit well with the USB tree for various reasons...
 
 Anyway, lots of little changes in here, all over the place, full details
 in the changelog below.
 
 All have been in the linux-next tree for a while with no issues.
 
 Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v2
 
 iEYEABECAAYFAlQ0aLYACgkQMUfUDdst+ylBvwCgs9fGRj0RQkLyGhQdEpzdZtTU
 ZcwAoMPBImnaA1ZeSl7ZnoO8vC/WE4bR
 =tfpj
 -----END PGP SIGNATURE-----

Merge tag 'usb-3.18-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb

Pull USB updates from Greg KH:
 "Here's the big USB patchset for 3.18-rc1.  Also in here is the PHY
  tree, as it seems to fit well with the USB tree for various reasons...

  Anyway, lots of little changes in here, all over the place, full
  details in the changelog

  All have been in the linux-next tree for a while with no issues"

* tag 'usb-3.18-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb: (244 commits)
  USB: host: st: fix typo 'CONFIG_USB_EHCI_HCD_ST'
  uas: Reduce number of function arguments for uas_alloc_foo functions
  xhci: Allow xHCI drivers to be built as separate modules
  xhci: Export symbols used by host-controller drivers
  xhci: Check for XHCI_COMP_MODE_QUIRK when disabling D3cold
  xhci: Introduce xhci_init_driver()
  usb: hcd: add generic PHY support
  usb: rename phy to usb_phy in HCD
  usb: gadget: uvc: fix up uvcg_v4l2_get_unmapped_area typo
  USB: host: st: fix ehci/ohci driver selection
  usb: host: ehci-exynos: Remove unnecessary usb-phy support
  usb: core: return -ENOTSUPP for all targeted hosts
  USB: Remove .owner field for driver
  usb: core: log higher level message on malformed LANGID descriptor
  usb: Add LED triggers for USB activity
  usb: Rename usb-common.c
  usb: gadget: Refactor request completion
  usb: gadget: Introduce usb_gadget_giveback_request()
  usb: dwc2/gadget: move phy bus legth initialization
  phy: remove .owner field for drivers using module_platform_driver
  ...
2014-10-08 06:47:31 -04:00
Linus Torvalds 87d7bcee4f Merge git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6
Pull crypto update from Herbert Xu:
 - add multibuffer infrastructure (single_task_running scheduler helper,
   OKed by Peter on lkml.
 - add SHA1 multibuffer implementation for AVX2.
 - reenable "by8" AVX CTR optimisation after fixing counter overflow.
 - add APM X-Gene SoC RNG support.
 - SHA256/SHA512 now handles unaligned input correctly.
 - set lz4 decompressed length correctly.
 - fix algif socket buffer allocation failure for 64K page machines.
 - misc fixes

* git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (47 commits)
  crypto: sha - Handle unaligned input data in generic sha256 and sha512.
  Revert "crypto: aesni - disable "by8" AVX CTR optimization"
  crypto: aesni - remove unused defines in "by8" variant
  crypto: aesni - fix counter overflow handling in "by8" variant
  hwrng: printk replacement
  crypto: qat - Removed unneeded partial state
  crypto: qat - Fix typo in name of tasklet_struct
  crypto: caam - Dynamic allocation of addresses for various memory blocks in CAAM.
  crypto: mcryptd - Fix typos in CRYPTO_MCRYPTD description
  crypto: algif - avoid excessive use of socket buffer in skcipher
  arm64: dts: add random number generator dts node to APM X-Gene platform.
  Documentation: rng: Add X-Gene SoC RNG driver documentation
  hwrng: xgene - add support for APM X-Gene SoC RNG support
  crypto: mv_cesa - Add missing #define
  crypto: testmgr - add test for lz4 and lz4hc
  crypto: lz4,lz4hc - fix decompression
  crypto: qat - Use pci_enable_msix_exact() instead of pci_enable_msix()
  crypto: drbg - fix maximum value checks on 32 bit systems
  crypto: drbg - fix sparse warning for cpu_to_be[32|64]
  crypto: sha-mb - sha1_mb_alg_state can be static
  ...
2014-10-08 06:44:48 -04:00
Linus Torvalds e4e65676f2 Fixes and features for 3.18.
Apart from the usual cleanups, here is the summary of new features:
 
 - s390 moves closer towards host large page support
 
 - PowerPC has improved support for debugging (both inside the guest and
   via gdbstub) and support for e6500 processors
 
 - ARM/ARM64 support read-only memory (which is necessary to put firmware
   in emulated NOR flash)
 
 - x86 has the usual emulator fixes and nested virtualization improvements
   (including improved Windows support on Intel and Jailhouse hypervisor
   support on AMD), adaptive PLE which helps overcommitting of huge guests.
   Also included are some patches that make KVM more friendly to memory
   hot-unplug, and fixes for rare caching bugs.
 
 Two patches have trivial mm/ parts that were acked by Rik and Andrew.
 
 Note: I will soon switch to a subkey for signing purposes.  To verify
 future signed pull requests from me, please update my key with
 "gpg --recv-keys 9B4D86F2".  You should see 3 new subkeys---the
 one for signing will be a 2048-bit RSA key, 4E6B09D7.
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v2.0.22 (GNU/Linux)
 
 iQIcBAABAgAGBQJUL5sPAAoJEBvWZb6bTYbyfkEP/3MNhSyn6HCjPjtjLNPAl9KL
 WpExZSUFL2+4CztpdGIsek1BeJYHmqv3+c5S+WvaWVA1aqh2R7FT1D1ErBLjgLQq
 lq23IOr+XxmC3dXQUEEk+TlD+283UzypzEG4l4UD3JYg79fE3UrXAz82SeyewJDY
 x7aPYhkZG3RHu+wAyMPasG6E3zS5LySdUtGWbiPwz5BejrhBJoJdeb2WIL/RwnUK
 7ppSLB5EoFj/uMkuyeAAdAbdfSrhHA6faDZxNdxS9k9wGutrhhfUoQ49ONrKG4dV
 sFo1tSPTVgRs8QFYUZ2fJUPBAmUVddsgqh2K9d0NftGTq7b8YszaCsfFrs2/Y4MU
 YxssWEhxsfszerCu12bbAJrv6JBZYQ7TwGvI9L7P0iFU6IVw/djmukU4AkM9/e91
 YS/cue/PN+9Pn2ccXzL9J7xRtZb8FsOuRsCXTCmbOwDkLmrKPDBN2t3RUbeF+Eam
 ABrpWnLKX13kZSo4LKU+/niarzmPMp7odQfHVdr8ea0fiYLp4iN8puA20WaSPIgd
 CLvm+RAvXe5Lm91L4mpFotJ2uFyK6QlIYJV4FsgeWv/0D0qppWQi0Utb/aCNHCgy
 z8MyUMD48y7EpoQrFYr/7cddXIu0/NegnM8I1coVjIPEk4NfeebGUlCJ/V3D8wMG
 BgEfS2x6jRc5zB3hjwDr
 =iEVi
 -----END PGP SIGNATURE-----

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull KVM updates from Paolo Bonzini:
 "Fixes and features for 3.18.

  Apart from the usual cleanups, here is the summary of new features:

   - s390 moves closer towards host large page support

   - PowerPC has improved support for debugging (both inside the guest
     and via gdbstub) and support for e6500 processors

   - ARM/ARM64 support read-only memory (which is necessary to put
     firmware in emulated NOR flash)

   - x86 has the usual emulator fixes and nested virtualization
     improvements (including improved Windows support on Intel and
     Jailhouse hypervisor support on AMD), adaptive PLE which helps
     overcommitting of huge guests.  Also included are some patches that
     make KVM more friendly to memory hot-unplug, and fixes for rare
     caching bugs.

  Two patches have trivial mm/ parts that were acked by Rik and Andrew.

  Note: I will soon switch to a subkey for signing purposes"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (157 commits)
  kvm: do not handle APIC access page if in-kernel irqchip is not in use
  KVM: s390: count vcpu wakeups in stat.halt_wakeup
  KVM: s390/facilities: allow TOD-CLOCK steering facility bit
  KVM: PPC: BOOK3S: HV: CMA: Reserve cma region only in hypervisor mode
  arm/arm64: KVM: Report correct FSC for unsupported fault types
  arm/arm64: KVM: Fix VTTBR_BADDR_MASK and pgd alloc
  kvm: Fix kvm_get_page_retry_io __gup retval check
  arm/arm64: KVM: Fix set_clear_sgi_pend_reg offset
  kvm: x86: Unpin and remove kvm_arch->apic_access_page
  kvm: vmx: Implement set_apic_access_page_addr
  kvm: x86: Add request bit to reload APIC access page address
  kvm: Add arch specific mmu notifier for page invalidation
  kvm: Rename make_all_cpus_request() to kvm_make_all_cpus_request() and make it non-static
  kvm: Fix page ageing bugs
  kvm/x86/mmu: Pass gfn and level to rmapp callback.
  x86: kvm: use alternatives for VMCALL vs. VMMCALL if kernel text is read-only
  kvm: x86: use macros to compute bank MSRs
  KVM: x86: Remove debug assertion of non-PAE reserved bits
  kvm: don't take vcpu mutex for obviously invalid vcpu ioctls
  kvm: Faults which trigger IO release the mmap_sem
  ...
2014-10-08 05:27:39 -04:00
Jassi Brar 2b6d83e2b8 mailbox: Introduce framework for mailbox
Introduce common framework for client/protocol drivers and
controller drivers of Inter-Processor-Communication (IPC).

Client driver developers should have a look at
 include/linux/mailbox_client.h to understand the part of
the API exposed to client drivers.
Similarly controller driver developers should have a look
at include/linux/mailbox_controller.h

Reviewed-by: Mark Brown <broonie@linaro.org>
Signed-off-by: Jassi Brar <jaswinder.singh@linaro.org>
2014-10-08 10:39:41 +05:30
Suman Anna f2fc42b6ac mailbox: rename pl320-ipc specific mailbox.h
The patch 30058677 "ARM / highbank: add support for pl320 IPC"
added a pl320 IPC specific header file as a generic mailbox.h.
This file has been renamed appropriately to allow the
introduction of the generic mailbox API framework.

Acked-by: Mark Langsdorf <mark.langsdorf@calxeda.com>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Suman Anna <s-anna@ti.com>
Reviewed-by: Mark Brown <broonie@linaro.org>
Acked-by: Arnd Bergmann <arnd@arndb.de>
2014-10-08 10:39:33 +05:30
Linus Torvalds 1e345ac686 Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dtor/input
Pull input updates from Dmitry Torokhov:
 "A few new haptic/button drivers, a rudimentary support for laptops
  using FocalTech touchpads; xpad driver will bind to more devices, and
  a few other driver fixes."

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dtor/input:
  Input: soc_button_array - convert to platform bus
  Input: palmas-pwrbutton - fix typo in the license string
  Input: palmas-pwrbutton - use IRQF_ONESHOT
  Input: psmouse - add support for detecting FocalTech PS/2 touchpads
  Input: psmouse - add psmouse_matches_pnp_id helper function
  Input: joystick - use ktime for measuring timing
  Input: add haptic driver on max77693
  Input: introduce palmas-pwrbutton
  Input: add support for the DRV2667 haptic driver
  Input: xpad - sync device IDs with xboxdrv
  Input: xpad - add VID/PID for Razer Sabertooth
  Input: cros_ec_keyb - optimize ghosting algorithm
  Input: drv260x - fix binding document
  Input: drv260x - add check for ERM mode and LRA Libraries
  Input: drv260x - remove unused defines
  Input: drv260x - add TI drv260x haptics driver
2014-10-07 21:26:52 -04:00
Linus Torvalds 39520eea19 Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jikos/hid
Pull HID updates from Jiri Kosina:

 - quirk for devices that need to be pulled in much more aggresive way
   than mandated, by Johan Hovold

 - robustification of sanity checking of incoming reports in RMI driver,
   by Benjamin Tissoires

 - fixes, updates, and new HW support to SONY driver, by Frank Praznik

 - port of uHID to the new transport layer layout, by David Herrmann

 - robustification of Clear-Halt/reset in USB HID, by Alan Stern

 - native support for hopefully any future HID compliant wacom tablet.
   Those found on the various laptops (ISDv4/5) already are HID
   compliant and they should work in the future without any modification
   of the kernel.  Written by Benjamin Tissoires.

 - a lot more simple fixes and device ID additions all over the place

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jikos/hid: (45 commits)
  HID: uHID: fix excepted report type
  HID: usbhid: add another mouse that needs QUIRK_ALWAYS_POLL
  HID: wacom: implement the finger part of the HID generic handling
  HID: wacom: implement generic HID handling for pen generic devices
  HID: wacom: move allocation of inputs earlier
  HID: wacom: split out input allocation and registration
  HID: wacom: rename failN with some meaningful information
  HID: sony: Update the DualShock 4 touchpad resolution
  HID: wacom: fix timeout on probe for some wacoms
  HID: sony: Set touchpad bits in the input_configured callback
  HID: sony: Update file header and correct comments
  HID: sony: Corrections for the DualShock 4 HID descriptor
  HID: rmi: check sanity of the incoming report
  HID: wacom: make the WL connection friendly for the desktop
  HID: wacom - enable LED support for Wireless Intuos5/Pro
  HID: wacom - remove report_id from wacom_get_report interface
  HID: wacom - Clean up of sysfs
  HID: wacom - Add default permission defines for sysfs attributes
  HID: usbhid: fix PIXART optical mouse
  HID: Add Holtek USB ID 04d9:a0c2 ETEKCITY Scroll
  ...
2014-10-07 21:17:29 -04:00
Linus Torvalds 28596c9722 Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jikos/trivial
Pull "trivial tree" updates from Jiri Kosina:
 "Usual pile from trivial tree everyone is so eagerly waiting for"

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jikos/trivial: (39 commits)
  Remove MN10300_PROC_MN2WS0038
  mei: fix comments
  treewide: Fix typos in Kconfig
  kprobes: update jprobe_example.c for do_fork() change
  Documentation: change "&" to "and" in Documentation/applying-patches.txt
  Documentation: remove obsolete pcmcia-cs from Changes
  Documentation: update links in Changes
  Documentation: Docbook: Fix generated DocBook/kernel-api.xml
  score: Remove GENERIC_HAS_IOMAP
  gpio: fix 'CONFIG_GPIO_IRQCHIP' comments
  tty: doc: Fix grammar in serial/tty
  dma-debug: modify check_for_stack output
  treewide: fix errors in printk
  genirq: fix reference in devm_request_threaded_irq comment
  treewide: fix synchronize_rcu() in comments
  checkstack.pl: port to AArch64
  doc: queue-sysfs: minor fixes
  init/do_mounts: better syntax description
  MIPS: fix comment spelling
  powerpc/simpleboot: fix comment
  ...
2014-10-07 21:16:26 -04:00
Linus Torvalds d29010694c spi: Updates for v3.18
A quiet release for SPI, mainly driver updates and not too many of them:
 
  - Support for dummy transfers (for delays on startup) in drivers using
    transfer_one().
  - Lots of enhancements to the Designware driver to support new Intel
    SoCs.
  - Support for newer Renesas chips.
  - DMA support for the i.MX driver.
  - One new driver for Broadcom BCM53xx chips.
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v1
 
 iQEcBAABAgAGBQJUMrIGAAoJECTWi3JdVIfQOEkH/2wn08N0k9OC9kx3JleIABBP
 nRWq83eeUauwFP9Z+d3p/m1Ta6vhaU8tNR8HOa8bXo6GFB0H4uTbNyCv93lUSv1R
 mdiUR9uAnM3Kxlx2Am9JhiDl1yB4O0dreHQI/xsyX6PCbnFbwc6MirhomZ04sAG0
 4u2UsdENODNzeynUNH0cyysuFq830MtQibeSQAF0mc+gjlFDd1dxVGLmnEY0PC8L
 WfRZrIyellB2Ss3VR87BlBejTPVatyw9VoQTXuy2v67chC/eZxudabaneq317DBi
 Bxclv3eF3tZNZJa+6OyU+xTuwQsam51lcK7znZJEyaJYPltj/AvUdWy/8afjcj4=
 =xAHI
 -----END PGP SIGNATURE-----

Merge tag 'spi-v3.18' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/spi

Pull spi updates from Mark Brown:
 "A quiet release for SPI, mainly driver updates and not too many of
  them:

   - Support for dummy transfers (for delays on startup) in drivers
     using transfer_one().
   - Lots of enhancements to the Designware driver to support new Intel
     SoCs.
   - Support for newer Renesas chips.
   - DMA support for the i.MX driver.
   - One new driver for Broadcom BCM53xx chips"

* tag 'spi-v3.18' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/spi: (64 commits)
  spi: spi-mxs: fix a tiny typo in a comment
  spi: dw-mid: follow new DMAengine workflow
  spi: dw-mid: convert to use DMAengine wrappers
  spi: dw-mid: change magic numbers to the constants
  spi: orion: support armada extended baud rates
  spi: fsl: Sort include headers alphabetically
  spi: bcm53xx: Add missing module information
  spi: bcm53xx: Fix module dependency
  spi/rockchip: fix bug that cause the failure to read data in DMA mode
  spi: fsl-dspi: Remove probe info message
  spi: pl022: Add support for chip select extension
  spi: Fix possible ZERO_SIZE_PTR pointer dereferencing error.
  spi: dw: fix style of code in few places
  spi: dw: introduce support of loopback mode
  spi: dw-mid: terminate ongoing transfers at exit
  spi: dw-mid: respect 8 bit mode
  spi: clps711x: Migrate to the new clk subsystem
  spi: pl022: Add missing error check for devm_kzalloc
  spi: spi-imx: add DMA support
  spi: davinci: add support for adding delay between word's transmissions
  ...
2014-10-07 21:12:56 -04:00
Linus Torvalds 81e29b7d1b regulator: Updates for v3.18
This time around most of the changes are a lot of new drivers along with
 the standard set of fixes and cleanups (thanks again largely to Axel
 Lin).  We do have one nice new feature in the core which factors out the
 disappointingly tricky code around DT parsing, only a couple of drivers
 have been converted so far:
 
  - Factor out the code for parsing the standard bindings for a set of
    regulators out of DT, making the probe part of a lot of drivers
    simplier.
  - New drivers for Dialog DA9213, HiSilicon HI6420, Intersil ISL9305/H,
    Ricoh RN5T618, Rockchip RK808, Skyworks SKY81452, Silergy SYR82x, and
    Qualcomm RPM.
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v1
 
 iQEcBAABAgAGBQJUMpL1AAoJECTWi3JdVIfQeKMH/34XfyuVNShoxJCEU6zihqET
 2C7WdjMqN7vRISsNDo4amfbYxGxzkj+QqqwmHYPdodGz8t/Pa6p7tmzofWUic/S7
 Hhu3BqZ+9+oDTYCTJnGWqaASlfF0HxG50UQm/1c1eVbNnFUuGnQ59DrDJO/VhK+F
 aHT52Zy50iN0vnG/qWNBR/duSI4ZMy3cZJenGTcnJb47ugLeV8qXZZPnEnG7Kr3z
 4pwDbteCb6Y6AaEw2O+cyPOM90vQ+ANnGt74faLCLe+UZ5i0XK+3K5mRaw36ID+Y
 6WmY/8ROcLXD757kVLiXjAExH42/n7Wa9RFpD8ho2o2dsKEra/9Q+t18LMtklos=
 =CTFD
 -----END PGP SIGNATURE-----

Merge tag 'regulator-v3.18' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/regulator

Pull regulator updates from Mark Brown:
 "This time around most of the changes are a lot of new drivers along
  with the standard set of fixes and cleanups (thanks again largely to
  Axel Lin).  We do have one nice new feature in the core which factors
  out the disappointingly tricky code around DT parsing, only a couple
  of drivers have been converted so far:

   - Factor out the code for parsing the standard bindings for a set of
     regulators out of DT, making the probe part of a lot of drivers
     simplier.
   - New drivers for Dialog DA9213, HiSilicon HI6420, Intersil
     ISL9305/H, Ricoh RN5T618, Rockchip RK808, Skyworks SKY81452,
     Silergy SYR82x, and Qualcomm RPM"

* tag 'regulator-v3.18' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/regulator: (71 commits)
  regulator: da9211: Fix a bug in update of mask bit
  regulator: pwm-regulator: add devicetree bindings for pwm regulator
  regulator: pwm-regulator: get voltage and duty table from dts
  regulator: qcom_rpm: Fix FORCE_MODE_IS_2_BITS macro
  regulator: qcom_rpm: Don't explicitly initialise the first field of config
  regulator: ltc3589: fix broken voltage transitions
  regulator: qcom-rpm: Regulator driver for the Qualcomm RPM
  regulator: axp20x: Use parent device as regulator configuration device
  regulator: fan53555: Fix null pointer dereference
  regulator: fan53555: Fixup report wrong vendor message
  regulator: fan53555: fix wrong cast in probe
  regulator: fan53555: add support for Silergy SYR82x regulators
  regulator: fan53555: add devicetree support
  regulator: add devicetree bindings for Fairchild FAN53555 regulators
  regulator: rk808: Add function for ramp delay for buck1/buck2
  regulator: fan53555: use set_ramp_delay to set the ramp up slew rate
  regulator: fan53555: enable vin supply
  regulator: rk808: Fix missing of_node_put
  regulator: rk808: Remove unused variables
  regulator: of: Add stub OF match function for !OF case
  ...
2014-10-07 21:07:48 -04:00
Linus Torvalds 2b425a3f11 This is the bulk of pin control changes for the v3.18
development series:
 
 - New drivers for the Freescale i.MX21, Qualcomm APQ8084
   pin controllers.
 
 - Incremental new features on the Rockchip, atlas 6,
   OMAP, AM437x, APQ8064, prima2, AT91, Tegra, i.MX, Berlin
   and Nomadik.
 
 - Push Freescale drivers down into their own subdirectory.
 
 - Assorted sprays of syntax and semantic fixes.
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v1
 
 iQIcBAABAgAGBQJUMkr3AAoJEEEQszewGV1z/xsP/1EFR6zyriyup3TuWw1kiqwH
 wGuEK3i0v99INcK46l9xD65aZLaK60Z6llYAmwa2wmFCDotOr46GSW2V9bqd0RHg
 6EZdDATo8Ge8j86L9oUMElbMJoKMQiVC6+YeiuhQRrFuq8TpXGoTMPeQlaEslR08
 MUjIVcxRbFeQCWOgkaqjjxwgX1FPU1S9aQNxDDDPuWSwTowV0nwBpdFviAgWlouY
 DDG4/WPWH7s/Ujv0MJ7MQR9Hkl6WYlcuhGWDUkcIj7f1lvxTTB37Em7daBUBwhQo
 PEYmf1AtwMEWF2y1i99ExFE/YSBQjjslYe29uECvaH63PVgfRMrWgJl199NOed8Q
 9GfEa+uwiV4Z4PxFZqcvsjUiNQg8SoijP4UTf9AJTuQZtebVia8OS9AFsN3XULHJ
 zXGKbCUd2kH+p/0/MJUePQEDoi9bPrsIhNG/s3KYmawQ6Ua4uytPgG0lF91dvP6m
 LvCnsGNDvGQUk1UUG3Lj4ZDCP42TAbjNyr27Ot/oUAygjHfjsXsZ6FFmlMCOeCRx
 tV+qjW9Ng69CSLPLKHCHVMsXKliJ2Vp2Mt8cr8yFyHaMDIneRx3IqUvrZ0dzfVLq
 /H3/7usvR/sEV23AI920mfPVYruIJESpBh6NKt66tPSSV2C6HP/qRTN/6tAwXqBL
 rzwv2t8qu5+ic2Ae5/wk
 =b4Tn
 -----END PGP SIGNATURE-----

Merge tag 'pinctrl-v3.18-1' of git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-pinctrl

Pull pin control changes from Linus Walleij:
 "This is the bulk of pin control changes for the v3.18 development
  series:

   - New drivers for the Freescale i.MX21, Qualcomm APQ8084 pin
     controllers.

   - Incremental new features on the Rockchip, atlas 6, OMAP, AM437x,
     APQ8064, prima2, AT91, Tegra, i.MX, Berlin and Nomadik.

   - Push Freescale drivers down into their own subdirectory.

   - Assorted sprays of syntax and semantic fixes"

* tag 'pinctrl-v3.18-1' of git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-pinctrl: (48 commits)
  pinctrl: specify bindings for pins and groups
  pinctrl: nomadik: improve GPIO debug prints
  pinctrl: abx500: refactor DT parser to take two paths
  pinctrl: abx500: use helpers for map allocation/free
  pinctrl: alter device tree bindings for functions
  pinctrl: nomadik: refactor DT parser to take two paths
  pinctrl: nomadik: use utils map free function
  pinctrl: nomadik: use util function to reserve maps
  pinctrl: qcom: use restart_notifier mechanism for ps_hold
  pinctrl: sh-pfc: sh73a0: Remove unnecessary SoC data allocation
  pinctrl: berlin: fix the dt_free_map function
  pinctrl: at91: disable PD or PU before enabling PU or PD
  pinctrl: st: remove gpiochip in failure cases
  pinctrl: at91: Fix error handling while doing gpiochio_irqchip_add
  pinctrl: at91: Fix failure path in at91_gpio_probe path
  pinctrl: lantiq: Release gpiochip resources in fail case
  pinctrl: imx: detect uninitialized pins
  pinctrl: tegra: Add MIPI pad control
  pinctrl: at91: Switch to using managed clk_get
  pinctrl: adi2: Remove duplicate gpiochip_remove_pin_ranges
  ...
2014-10-07 20:56:28 -04:00
Linus Torvalds d0cd84817c dmaengine-3.17
1/ Step down as dmaengine maintainer see commit 08223d80df "dmaengine
    maintainer update"
 
 2/ Removal of net_dma, as it has been marked 'broken' since 3.13 (commit
    7787380336 "net_dma: mark broken"), without reports of performance
    regression.
 
 3/ Miscellaneous fixes
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v1
 
 iQIcBAABAgAGBQJUKDLKAAoJEB7SkWpmfYgC7wwP/iNHqRjf1suMUTBIF3P6Hgbe
 VCUwh0IkuujMPDG46WRn6cYzarRxVPLoGaLHLPszgjI6pmGPVv19wqeDOlUxtcmr
 0iQWEWv/zqseaAIW+4gj/WYCyMgKil49EUBJKCZCfNmIaad+e0pr8f0uE5yOkHPM
 tqWoZERu9A4dlXGr1TjeOZVzdnPrCt92MrLDN6ZZ6tMuJaEc5PauaLxKTeGy5fYj
 UB+k1xJQzECbsYfpB+uCVYl5/qPO1rNyuBYS8THCsW+JYmrbbfH2kkF2lo2FaUpO
 8Yd50FtzXHKWwAt7BzfIwU2M7x0wRmryrC/xsQi6M+WmVeHYvvHUIpzaA66xRZ5x
 fCy3Fu8sEnmnmboAbh2v2c5uTycqRl2xPzbpLAuxglloXIxzi3ckp6ESF/Z4SldH
 oxIoEievN7lah3vKgvlHZYcWDzrYr8EKf/EzFe9RqDBQDKtzDzre1H9Uivr387Vm
 uFUcGHYG/GXuX47C7EUsMtaSW2UEoR2ytw/HR6CKFPTVXwAzEO6kA9vg0EqL0iIq
 2wVLgavlZuwegmaUBgnr+bgVZMvVN7OU7fAIRVe5xNO6itrPKvheSlQthmRiiq9C
 uzOu4PS6PexqzHUNPCcJpCsj+lawmCSrE0bxtPzTA/CQInVgWs219V9+W5Gn/0YA
 EARN9k6ueX9PZPQrPQLm
 =BBBv
 -----END PGP SIGNATURE-----

Merge tag 'dmaengine-3.17' of git://git.kernel.org/pub/scm/linux/kernel/git/djbw/dmaengine

Pull dmaengine updates from Dan Williams:
 "Even though this has fixes marked for -stable, given the size and the
  needed conflict resolutions this is 3.18-rc1/merge-window material.

  These patches have been languishing in my tree for a long while.  The
  fact that I do not have the time to do proper/prompt maintenance of
  this tree is a primary factor in the decision to step down as
  dmaengine maintainer.  That and the fact that the bulk of drivers/dma/
  activity is going through Vinod these days.

  The net_dma removal has not been in -next.  It has developed simple
  conflicts against mainline and net-next (for-3.18).

  Continuing thanks to Vinod for staying on top of drivers/dma/.

  Summary:

   1/ Step down as dmaengine maintainer see commit 08223d80df
      "dmaengine maintainer update"

   2/ Removal of net_dma, as it has been marked 'broken' since 3.13
      (commit 7787380336 "net_dma: mark broken"), without reports of
      performance regression.

   3/ Miscellaneous fixes"

* tag 'dmaengine-3.17' of git://git.kernel.org/pub/scm/linux/kernel/git/djbw/dmaengine:
  net: make tcp_cleanup_rbuf private
  net_dma: revert 'copied_early'
  net_dma: simple removal
  dmaengine maintainer update
  dmatest: prevent memory leakage on error path in thread
  ioat: Use time_before_jiffies()
  dmaengine: fix xor sources continuation
  dma: mv_xor: Rename __mv_xor_slot_cleanup() to mv_xor_slot_cleanup()
  dma: mv_xor: Remove all callers of mv_xor_slot_cleanup()
  dma: mv_xor: Remove unneeded mv_xor_clean_completed_slots() call
  ioat: Use pci_enable_msix_exact() instead of pci_enable_msix()
  drivers: dma: Include appropriate header file in dca.c
  drivers: dma: Mark functions as static in dma_v3.c
  dma: mv_xor: Add DMA API error checks
  ioat/dca: Use dev_is_pci() to check whether it is pci device
2014-10-07 20:39:25 -04:00
Linus Torvalds bdf428feb2 Nothing major: support for compressing modules, and auto-tainting params.
Cheers,
 Rusty.
 PS.  My virtio-next tree is empty: DaveM took the patches I had.  There might
      be a virtio-rng starvation fix, but so far it's a bit voodoo so I will
      get to that in the next two days or it will wait.
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v1
 
 iQIcBAABAgAGBQJUGFrvAAoJENkgDmzRrbjxOJYQALaZbTumrtX3Mo/FAtzn8d5N
 8gxcqk1Mhz4lR1vPWy/YN/H2f23qb/saqLxPar8Wgou3h7N8EqSdwDqJSuvEqhG0
 iEXUsNLC7BOsDkLYhdjTfZoW/lsVU/EH4bkZMSxAZI9V64phXhDYfPb5SQgJTECr
 Ue6IK4ijW6zdWLstGfg/ixrIeGDUSnyiThF9O2mYVaB1D0QkLDIAZxbjZJgfFfut
 PwO33/sEV4pceTpkmxFKl/OiS+obi/VbDixjSCcO+jaBd1pVxH9fhhKREStOhN4z
 88z5ADR71RH6so9TQTwIIcgb2Hon5d+3RVMB6CxuvKs9NmHSXDiQyZvG9J/jiSdm
 KrPKSiVwGGwJSwxXTm8CDaz6Oj0ibDXBIzv/vYI22sR7u8PmRQFvL3O1VrW+KDnE
 yoG75S9DHzSQ1183xFFFTt4FBRm/4XKyVs+F6YqYkchLigrUfQMCGb1cmZyE5y7K
 bgNyonu0m/ItoQmekoDgYqvSjwdguaJ35XCW55GrKJ84JDHBaw3SpPdEfjAS8FsH
 aT5o2oernvwRG6gsX9858RvB/uo1UKwHv1waDfV4cqNjMm5Ko+Yr6OIdQvBQiq07
 cFkVmkrMtEyX19QyIGW3QSbFL1lr3X5cC5glzEeKY941yZbTluSsNuMlMPT1+IMx
 NOUbh0aG8B8ZaMZPFNLi
 =QzCn
 -----END PGP SIGNATURE-----

Merge tag 'modules-next-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rusty/linux

Pull module update from Rusty Russell:
 "Nothing major: support for compressing modules, and auto-tainting
  params.

  PS. My virtio-next tree is empty: DaveM took the patches I had.  There
      might be a virtio-rng starvation fix, but so far it's a bit voodoo
      so I will get to that in the next two days or it will wait"

* tag 'modules-next-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rusty/linux:
  moduleparam: Resolve missing-field-initializer warning
  kbuild: handle module compression while running 'make modules_install'.
  modinst: wrap long lines in order to enhance cmd_modules_install
  modsign: lookup lines ending in .ko in .mod files
  modpost: simplify file name generation of *.mod.c files
  modpost: reduce visibility of symbols and constify r/o arrays
  param: check for tainting before calling set op.
  drm/i915: taint the kernel if unsafe module parameters are set
  module: add module_param_unsafe and module_param_named_unsafe
  module: make it possible to have unsafe, tainting module params
  module: rename KERNEL_PARAM_FL_NOARG to avoid confusion
2014-10-07 20:17:38 -04:00
Jeff Layton 1b2b32dcdb locks: fix fcntl_setlease/getlease return when !CONFIG_FILE_LOCKING
Currently they both just return 0. Fix them to return more appropriate
values instead.

For better or worse, most places in the kernel return -EINVAL when
leases aren't available. -ENOLCK would probably have been better, but
let's follow suit here in the case of F_SETLEASE.

In the F_GETLEASE case, just return F_UNLCK since we know that no
lease will have been set.

Signed-off-by: Jeff Layton <jlayton@primarydata.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
2014-10-07 14:06:13 -04:00
Jeff Layton 7ca76311fe locks: set fl_owner for leases to filp instead of current->files
Like flock locks, leases are owned by the file description. Now that the
i_have_this_lease check in __break_lease is gone, we don't actually use
the fl_owner for leases for anything. So, it's now safe to set this more
appropriately to the same value as the fl_file.

While we're at it, fix up the comments over the fl_owner_t definition
since they're rather out of date.

Signed-off-by: Jeff Layton <jlayton@primarydata.com>
2014-10-07 14:06:13 -04:00
Jeff Layton 4d01b7f5e7 locks: give lm_break a return value
Christoph suggests:

   "Add a return value to lm_break so that the lock manager can tell the
    core code "you can delete this lease right now".  That gets rid of
    the games with the timeout which require all kinds of race avoidance
    code in the users."

Do that here and have the nfsd lease break routine use it when it detects
that there was a race between setting up the lease and it being broken.

Signed-off-by: Jeff Layton <jlayton@primarydata.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
2014-10-07 14:06:13 -04:00
Jeff Layton c45198eda2 locks: move freeing of leases outside of i_lock
There was only one place where we still could free a file_lock while
holding the i_lock -- lease_modify. Add a new list_head argument to the
lm_change operation, pass in a private list when calling it, and fix
those callers to dispose of the list once the lock has been dropped.

Signed-off-by: Jeff Layton <jlayton@primarydata.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
2014-10-07 14:06:13 -04:00
Jeff Layton 1c7dd2ff43 locks: define a lm_setup handler for leases
...and move the fasync setup into it for fcntl lease calls. At the same
time, change the semantics of how the file_lock double-pointer is
handled. Up until now, on a successful lease return you got a pointer to
the lock on the list. This is bad, since that pointer can no longer be
relied on as valid once the inode->i_lock has been released.

Change the code to instead just zero out the pointer if the lease we
passed in ended up being used. Then the callers can just check to see
if it's NULL after the call and free it if it isn't.

The priv argument has the same semantics. The lm_setup function can
zero the pointer out to signal to the caller that it should not be
freed after the function returns.

Signed-off-by: Jeff Layton <jlayton@primarydata.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
2014-10-07 14:06:12 -04:00
Jeff Layton e6f5c78930 locks: plumb a "priv" pointer into the setlease routines
In later patches, we're going to add a new lock_manager_operation to
finish setting up the lease while still holding the i_lock.  To do
this, we'll need to pass a little bit of info in the fcntl setlease
case (primarily an fasync structure). Plumb the extra pointer into
there in advance of that.

We declare this pointer as a void ** to make it clear that this is
private info, and that the caller isn't required to set this unless
the lm_setup specifically requires it.

Signed-off-by: Jeff Layton <jlayton@primarydata.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
2014-10-07 14:06:12 -04:00
Eric Dumazet 0287587884 net: better IFF_XMIT_DST_RELEASE support
Testing xmit_more support with netperf and connected UDP sockets,
I found strange dst refcount false sharing.

Current handling of IFF_XMIT_DST_RELEASE is not optimal.

Dropping dst in validate_xmit_skb() is certainly too late in case
packet was queued by cpu X but dequeued by cpu Y

The logical point to take care of drop/force is in __dev_queue_xmit()
before even taking qdisc lock.

As Julian Anastasov pointed out, need for skb_dst() might come from some
packet schedulers or classifiers.

This patch adds new helper to cleanly express needs of various drivers
or qdiscs/classifiers.

Drivers that need skb_dst() in their ndo_start_xmit() should call
following helper in their setup instead of the prior :

	dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
->
	netif_keep_dst(dev);

Instead of using a single bit, we use two bits, one being
eventually rebuilt in bonding/team drivers.

The other one, is permanent and blocks IFF_XMIT_DST_RELEASE being
rebuilt in bonding/team. Eventually, we could add something
smarter later.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Julian Anastasov <ja@ssi.bg>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-10-07 13:22:11 -04:00
Petri Gynther fd2ef0ba30 net: phy: adjust fixed_phy_register() return value
Adjust fixed_phy_register() to return struct phy_device *, so that
it becomes easy to use fixed PHYs without device tree support:

  phydev = fixed_phy_register(PHY_POLL, &fixed_phy_status, NULL);
  fixed_phy_set_link_update(phydev, fixed_phy_link_update);
  phy_connect_direct(netdev, phydev, handler_fn, phy_interface);

This change is a prerequisite for modifying bcmgenet driver to work
without a device tree on Broadcom's MIPS-based 7xxx platforms.

Signed-off-by: Petri Gynther <pgynther@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-10-07 13:06:45 -04:00
Rafael J. Wysocki 4734c6efc8 Merge branch 'pm-avs'
* pm-avs:
  MAINTAINERS: update entry for drivers/power/avs
  PM / AVS: rockchip-io: add driver handling Rockchip io domains
  regulator: core: Add REGULATOR_EVENT_PRE_VOLTAGE_CHANGE (and ABORT)
2014-10-07 01:18:38 +02:00
Rafael J. Wysocki 0f4685d0ec Merge branch 'pm-cpufreq'
* pm-cpufreq:
  cpufreq: cpufreq-dt: fix potential double put of cpu OF node
  cpufreq: cpu0: rename driver and internals to 'cpufreq_dt'
  cpufreq: ppc-corenet: remove duplicate update of cpu_data
  cpufreq: Replace strnicmp with strncasecmp
  cpufreq: powernv: Set the cpus to nominal frequency during reboot/kexec
  cpufreq: powernv: Set the pstate of the last hotplugged out cpu in policy->cpus to minimum
  cpufreq: Allow stop CPU callback to be used by all cpufreq drivers
  cpufreq: cpu0: Make allocate_resources() work for any CPU
  cpufreq: cpu0: try regulators with name "cpu-supply"
  cpufreq: cpu0: Move per-cluster initialization code to ->init()
  cpufreq: cpu0: use dev_{err|warn|dbg} instead of pr_{err|warn|debug}
  cpufreq: cpu0: print relevant error when we defer probe
  cpufreq: cpu0: don't validate clock on clk_put()
  cpufreq: cpu0: Update Module Author
  cpufreq: Add support for per-policy driver data
2014-10-07 01:18:30 +02:00
Rafael J. Wysocki 49a09c9ab0 Merge branch 'pm-domains'
* pm-domains: (32 commits)
  PM / Domains: Rename cpu_data to cpuidle_data
  PM / Domains: Move dev_pm_domain_attach|detach() to pm_domain.h
  PM / Domains: Remove legacy API for adding devices through DT
  PM / Domains: Add genpd attach/detach callbacks
  PM / Domains: add debugfs listing of struct generic_pm_domain-s
  ACPI / PM: Convert acpi_dev_pm_detach() into a static function
  ARM: exynos: Move to generic PM domain DT bindings
  amba: Add support for attach/detach of PM domains
  spi: core: Convert to dev_pm_domain_attach|detach()
  mmc: sdio: Convert to dev_pm_domain_attach|detach()
  i2c: core: Convert to dev_pm_domain_attach|detach()
  drivercore / platform: Convert to dev_pm_domain_attach|detach()
  PM / Domains: Add APIs to attach/detach a PM domain for a device
  PM / Domains: Add generic OF-based PM domain look-up
  ACPI / PM: Assign the ->detach() callback when attaching the PM domain
  PM / Domains: Add a detach callback to the struct dev_pm_domain
  PM / domains: Spelling s/domian/domain/
  PM / domains: Keep declaration of dev_power_governors together
  PM / domains: Remove default_stop_ok() API
  drivers: sh: Leave disabling of unused PM domains to genpd
  ...
2014-10-07 01:18:12 +02:00
Eric Dumazet fcbeb976d7 net: introduce netdevice gso_min_segs attribute
Some TSO engines might have a too heavy setup cost, that impacts
performance on hosts sending small bursts (2 MSS per packet).

This patch adds a device gso_min_segs, allowing drivers to set
a minimum segment size for TSO packets, according to the NIC
performance.

Tested on a mlx4 NIC, this allows to get a ~110% increase of
throughput when sending 2 MSS per packet.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-10-06 17:56:28 -04:00
Jiri Kosina ee5db7e47f Merge branches 'for-3.18/always-poll-quirk', 'for-3.18/logitech', 'for-3.18/picolcd', 'for-3.18/rmi', 'for-3.18/sony', 'for-3.18/uhid', 'for-3.18/upstream' and 'for-3.18/wacom' into for-linus 2014-10-06 23:34:40 +02:00
Andrew Bresticker a6551a76ff mfd: cros_ec: stop calling ->cmd_xfer() directly
Instead of having users of the ChromeOS EC call the interface-specific
cmd_xfer() callback directly, introduce a central cros_ec_cmd_xfer()
to use instead.  This will allow us to put all the locking and retry
logic in one place instead of duplicating it across the different
drivers.

Signed-off-by: Andrew Bresticker <abrestic@chromium.org>
Reviewed-by: Simon Glass <sjg@chromium.org>
Signed-off-by: Javier Martinez Canillas <javier.martinez@collabora.co.uk>
Reviewed-by: Doug Anderson <dianders@chromium.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
2014-10-06 21:29:07 +01:00
Eric Dumazet 7dfa4b414d net/mlx4_en: Code cleanups in tx path
- Remove unused variable ring->poll_cnt
- No need to set some fields if using blueflame
- Add missing const's
- Use unlikely
- Remove unneeded new line
- Make some comments more precise
- struct mlx4_bf @offset field reduced to unsigned int to save space

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Amir Vadai <amirv@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-10-06 01:04:15 -04:00
David S. Miller a4b4a2b7f9 Merge tag 'master-2014-10-02' of git://git.kernel.org/pub/scm/linux/kernel/git/linville/wireless-next
John W. Linville says:

====================
pull request: wireless-next 2014-10-03

Please pull tihs batch of updates intended for the 3.18 stream!

For the iwlwifi bits, Emmanuel says:

"I have here a few things that depend on the latest mac80211's changes:
RRM, TPC, Quiet Period etc...  Eyal keeps improving our rate control
and we have a new device ID. This last patch should probably have
gone to wireless.git, but at that stage, I preferred to send it to
-next and CC stable."

For (most of) the Atheros bits, Kalle says:

"The only new feature is testmode support from me. Ben added a new method
to crash the firmware with an assert for debug purposes. As usual, we
have lots of smaller fixes from Michal. Matteo fixed a Kconfig
dependency with debugfs. I fixed some warnings recently added to
checkpatch."

For the NFC bits, Samuel says:

"We've had major updates for TI and ST Microelectronics drivers, and a
few NCI related changes.

For TI's trf7970a driver:

- Target mode support for trf7970a
- Suspend/resume support for trf7970a
- DT properties additions to handle different quirks
- A bunch of fixes for smartphone IOP related issues

For ST Microelectronics' ST21NFCA and ST21NFCB drivers:

- ISO15693 support for st21nfcb
- checkpatch and sparse related warning fixes
- Code cleanups and a few minor fixes

Finally, Marvell added ISO15693 support to the NCI stack, together with a
couple of NCI fixes."

For the Bluetooth bits, Johan says:

"This 3.18 pull request replaces the one I did on Monday ("bluetooth-next
2014-09-22", which hasn't been pulled yet). The additions since the last
request are:

 - SCO connection fix for devices not supporting eSCO
 - Cleanups regarding the SCO establishment logic
 - Remove unnecessary return value from logging functions
 - Header compression fix for 6lowpan
 - Cleanups to the ieee802154/mrf24j40 driver

Here's a copy from previous request that this one replaces:

'
Here are some more patches for 3.18. They include various fixes to the
btusb HCI driver, a fix for LE SMP, as well as adding Jukka to the
MAINTAINERS file for generic 6LoWPAN (as requested by Alexander Aring).

I've held on to this pull request a bit since we were waiting for a SCO
related fix to get sorted out first. However, since the merge window is
getting closer I decided not to wait for it. If we do get the fix sorted
out there'll probably be a second small pull request later this week.
'"

And,

"Unless 3.17 gets delayed this will probably be our last -next pull request for
3.18. We've got:

  - New Marvell hardware supportr
  - Multicast support for 6lowpan
  - Several of 6lowpan fixes & cleanups
  - Fix for a (false-positive) lockdep warning in L2CAP
  - Minor btusb cleanup"

On top of all that comes the usual sort of updates to ath5k, ath9k,
ath10k, brcmfmac, mwifiex, and wil6210.  This time around there are
also a number of rtlwifi updates to enable some new hardware and
to reconcile the in-kernel drivers with some newer releases of the
Realtek vendor drivers.  Also of note is some device tree work for
the bcma bus.

Please let me know if there are problems!
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
2014-10-05 21:34:39 -04:00
Vijay Subramanian c8753d55af net: Cleanup skb cloning by adding SKB_FCLONE_FREE
SKB_FCLONE_UNAVAILABLE has overloaded meaning depending on type of skb.
1: If skb is allocated from head_cache, it indicates fclone is not available.
2: If skb is a companion fclone skb (allocated from fclone_cache), it indicates
it is available to be used.

To avoid confusion for case 2 above, this patch  replaces
SKB_FCLONE_UNAVAILABLE with SKB_FCLONE_FREE where appropriate. For fclone
companion skbs, this indicates it is free for use.

SKB_FCLONE_UNAVAILABLE will now simply indicate skb is from head_cache and
cannot / will not have a companion fclone.

Signed-off-by: Vijay Subramanian <subramanian.vijay@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-10-04 20:34:25 -04:00
Pantelis Antoniou 7941b27b16 of: Introduce Device Tree resolve support.
Introduce support for dynamic device tree resolution.
Using it, it is possible to prepare a device tree that's
been loaded on runtime to be modified and inserted at the kernel
live tree.

Export of of_resolve and bug fix of double free by
	Guenter Roeck <groeck@juniper.net>

Signed-off-by: Pantelis Antoniou <pantelis.antoniou@konsulko.com>
[grant.likely: Don't need to select CONFIG_OF_DYNAMIC and CONFIG_OF_DEVICE]
[grant.likely: Don't need to depend on OF or !SPARC]
[grant.likely: Factor out duplicate code blocks into single function]
Signed-off-by: Grant Likely <grant.likely@linaro.org>
2014-10-04 21:24:26 +01:00
Tom Herbert efc98d08e1 fou: eliminate IPv4,v6 specific GRO functions
This patch removes fou[46]_gro_receive and fou[46]_gro_complete
functions. The v4 or v6 variants were chosen for the UDP offloads
based on the address family of the socket this is not necessary
or correct. Alternatively, this patch adds is_ipv6 to napi_gro_skb.
This is set in udp6_gro_receive and unset in udp4_gro_receive. In
fou_gro_receive the value is used to select the correct inet_offloads
for the protocol of the outer IP header.

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-10-03 16:53:32 -07:00
Michael Ellerman 75d43b2d0a Merge branch 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/scottwood/linux.git
Freescale updates from Scott (27 commits):

  "Highlights include DMA32 zone support (SATA, USB, etc now works on 64-bit
   FSL kernels), MSI changes, 8xx optimizations and cleanup, t104x board
   support, and PrPMC PCI enumeration."
2014-10-04 08:59:06 +10:00
Eli Cohen 5903325a64 net/mlx5_core: Identify resources by their type
This patch puts a common part as the first field of mlx5_core_qp. This field is
used to identify which resource generated an event. This is required since upcoming
new resource types such as DC targets are allocated for the same numerical space
as regular QPs and may generate the same events. By searching the resource in the
same table we can then look at the common field to identify the resource.

Signed-off-by: Eli Cohen <eli@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-10-03 15:42:32 -07:00
Eli Cohen b775516b04 net/mlx5_core: use set/get macros in device caps
Transform device capabilities related commands to use set/get macros to
manipulate command mailboxes.

Signed-off-by: Eli Cohen <eli@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-10-03 15:42:32 -07:00
Eli Cohen d29b796ada net/mlx5_core: Use hardware registers description header file
Add an auto generated header file that describes hardware registers along with
set of macros that set/get values. The macros do static checks to avoid
overflow, handle endianess, and overall provide a clean way to code commands.
Currently the header file is small and we will add structs as we make use of
the macros.
A few commands were removed from the commands enum since they are not supported
currently and will be added when support is available.

Signed-off-by: Eli Cohen <eli@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-10-03 15:42:31 -07:00
Eli Cohen c7a08ac7ee net/mlx5_core: Update device capabilities handling
Rearrange struct mlx5_caps so it has a "gen" field to represent the current
capabilities configured for the device. Max capabilities can also be queried
from the device. Also update capabilities struct to contain more fields as per
the latest revision if firmware specification.

Signed-off-by: Eli Cohen <eli@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-10-03 15:42:31 -07:00
Eric Dumazet 55a93b3ea7 qdisc: validate skb without holding lock
Validation of skb can be pretty expensive :

GSO segmentation and/or checksum computations.

We can do this without holding qdisc lock, so that other cpus
can queue additional packets.

Trick is that requeued packets were already validated, so we carry
a boolean so that sch_direct_xmit() can validate a fresh skb list,
or directly use an old one.

Tested on 40Gb NIC (8 TX queues) and 200 concurrent flows, 48 threads
host.

Turning TSO on or off had no effect on throughput, only few more cpu
cycles. Lock contention on qdisc lock disappeared.

Same if disabling TX checksum offload.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-10-03 15:36:11 -07:00
Joe Perches 906d201530 dynamic_debug: change __dynamic_<foo>_dbg return types to void
The return value is not used by callers of these functions
so change the functions to return void.

Signed-off-by: Joe Perches <joe@perches.com>
Acked-by: Jason Baron <jbaron@akamai.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2014-10-03 14:55:48 -07:00
Junichi Nomura d8f429e166 block: add bioset_create_nobvec()
Users of bio_clone_fast() do not want bios with their own bvecs.
Allocating a bvec mempool as part of the bioset intended for such users
is a waste of memory.

bioset_create_nobvec() creates a bioset that doesn't have the bvec
mempool.

Signed-off-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
2014-10-03 15:28:18 -06:00
Matt Fleming 75b128573b Merge branch 'next' into efi-next-merge
Conflicts:
	arch/x86/boot/compressed/eboot.c
2014-10-03 22:15:56 +01:00
Matt Fleming 6d80dba1c9 efi: Provide a non-blocking SetVariable() operation
There are some circumstances that call for trying to write an EFI
variable in a non-blocking way. One such scenario is when writing pstore
data in efi_pstore_write() via the pstore_dump() kdump callback.

Now that we have an EFI runtime spinlock we need a way of aborting if
there is contention instead of spinning, since when writing pstore data
from the kdump callback, the runtime lock may already be held by the CPU
that's running the callback if we crashed in the middle of an EFI
variable operation.

The situation is sufficiently special that a new EFI variable operation
is warranted.

Introduce ->set_variable_nonblocking() for this use case. It is an
optional EFI backend operation, and need only be implemented by those
backends that usually acquire locks to serialize access to EFI
variables, as is the case for virt_efi_set_variable() where we now grab
the EFI runtime spinlock.

Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Matthew Garrett <mjg59@srcf.ucam.org>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
2014-10-03 18:41:03 +01:00
Laszlo Ersek 98d2a6ca14 efi: Introduce efi_md_typeattr_format()
At the moment, there are three architectures debug-printing the EFI memory
map at initialization: x86, ia64, and arm64. They all use different format
strings, plus the EFI memory type and the EFI memory attributes are
similarly hard to decode for a human reader.

Introduce a helper __init function that formats the memory type and the
memory attributes in a unified way, to a user-provided character buffer.

The array "memory_type_name" is copied from the arm64 code, temporarily
duplicating it. The (otherwise optional) braces around each string literal
in the initializer list are dropped in order to match the kernel coding
style more closely. The element size is tightened from 32 to 20 bytes
(maximum actual string length + 1) so that we can derive the field width
from the element size.

Signed-off-by: Laszlo Ersek <lersek@redhat.com>
Tested-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
[ Dropped useless 'register' keyword, which compiler will ignore ]
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
2014-10-03 18:41:00 +01:00
Laszlo Ersek 9c97e0bdd4 efi: Add macro for EFI_MEMORY_UCE memory attribute
Add the following macro from the UEFI spec, for completeness:

  EFI_MEMORY_UCE  Memory cacheability attribute: The memory region
                  supports being configured as not cacheable, exported,
                  and supports the "fetch and add" semaphore mechanism.

Signed-off-by: Laszlo Ersek <lersek@redhat.com>
Tested-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
2014-10-03 18:40:59 +01:00
Dave Young 6ccc72b87b lib: Add a generic cmdline parse function parse_option_str
There should be a generic function to parse params like a=b,c
Adding parse_option_str in lib/cmdline.c which will return true
if there's specified option set in the params.

Also updated efi=old_map parsing code to use the new function

Signed-off-by: Dave Young <dyoung@redhat.com>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
2014-10-03 18:40:58 +01:00
Dave Young b2e0a54a12 efi: Move noefi early param code out of x86 arch code
noefi param can be used for arches other than X86 later, thus move it
out of x86 platform code.

Signed-off-by: Dave Young <dyoung@redhat.com>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
2014-10-03 18:40:58 +01:00
Matt Fleming 5a17dae422 efi: Add efi= parameter parsing to the EFI boot stub
We need a way to customize the behaviour of the EFI boot stub, in
particular, we need a way to disable the "chunking" workaround, used
when reading files from the EFI System Partition.

One of my machines doesn't cope well when reading files in 1MB chunks to
a buffer above the 4GB mark - it appears that the "chunking" bug
workaround triggers another firmware bug. This was only discovered with
commit 4bf7111f50 ("x86/efi: Support initrd loaded above 4G"), and
that commit is perfectly valid. The symptom I observed was a corrupt
initrd rather than any kind of crash.

efi= is now used to specify EFI parameters in two very different
execution environments, the EFI boot stub and during kernel boot.

There is also a slight performance optimization by enabling efi=nochunk,
but that's offset by the fact that you're more likely to run into
firmware issues, at least on x86. This is the rationale behind leaving
the workaround enabled by default.

Also provide some documentation for EFI_READ_CHUNK_SIZE and why we're
using the current value of 1MB.

Tested-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Roy Franz <roy.franz@linaro.org>
Cc: Maarten Lankhorst <m.b.lankhorst@gmail.com>
Cc: Leif Lindholm <leif.lindholm@linaro.org>
Cc: Borislav Petkov <bp@suse.de>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
2014-10-03 18:40:57 +01:00
Mark Brown bab4d751f7 Merge remote-tracking branches 'spi/topic/pl022', 'spi/topic/pxa2xx', 'spi/topic/rspi', 'spi/topic/sh-msiof' and 'spi/topic/sirf' into spi-next 2014-10-03 16:33:42 +01:00
Ulf Hansson f39cb1797e PM / Domains: Rename cpu_data to cpuidle_data
The "cpu_data" are defined for some archs and thus conflicting with the
"cpu_data" member in the struct gpd_cpu_data. This causes a compiler
error for those archs.

Let's fix it by rename the member to cpuidle_data. In this context it
also seems appropriate to rename the struct to gpd_cpuidle_data to
better reflect its use.

Fixes: f48c767ce8 (PM / Domains: Move dev_pm_domain_attach|detach() to pm_domain.h)
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Acked-by: Geert Uytterhoeven <geert+renesas@glider.be>
Acked-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2014-10-03 15:43:14 +02:00
Rafael J. Wysocki 6f1293ff74 Merge back cpufreq material for v3.18. 2014-10-03 15:41:16 +02:00
Adrian Hunter 6154139794 mmc: sdhci: Add quirk for always getting TC with stop cmd
Add a quirk for a host controller that always sets
a Transfer Complete interrupt status for the stop
command even when a busy response is not indicated.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
2014-10-03 14:18:17 +02:00
Andy Grover f14bb039a4 uio: Export definition of struct uio_device
In order to prevent a O(n) search of the filesystem to link up its uio
node with its target configuration, TCMU needs to know the minor number
that UIO assigned. Expose the definition of this struct so TCMU can
access this field.

Signed-off-by: Andy Grover <agrover@redhat.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
2014-10-02 21:35:54 -07:00
Peter Zijlstra 8acd91e862 locking/lockdep: Revert qrwlock recusive stuff
Commit f0bab73cb5 ("locking/lockdep: Restrict the use of recursive
read_lock() with qrwlock") changed lockdep to try and conform to the
qrwlock semantics which differ from the traditional rwlock semantics.

In particular qrwlock is fair outside of interrupt context, but in
interrupt context readers will ignore all fairness.

The problem modeling this is that read and write side have different
lock state (interrupts) semantics but we only have a single
representation of these. Therefore lockdep will get confused, thinking
the lock can cause interrupt lock inversions.

So revert it for now; the old rwlock semantics were already imperfectly
modeled and the qrwlock extra won't fit either.

If we want to properly fix this, I think we need to resurrect the work
by Gautham did a few years ago that split the read and write state of
locks:

   http://lwn.net/Articles/332801/

FWIW the locking selftest that would've failed (and was reported by
Borislav earlier) is something like:

  RL(X1);	/* IRQ-ON */
  LOCK(A);
  UNLOCK(A);
  RU(X1);

  IRQ_ENTER();
  RL(X1);	/* IN-IRQ */
  RU(X1);
  IRQ_EXIT();

At which point it would report that because A is an IRQ-unsafe lock we
can suffer the following inversion:

	CPU0		CPU1

	lock(A)
			lock(X1)
			lock(A)
	<IRQ>
	 lock(X1)

And this is 'wrong' because X1 can recurse (assuming the above lock are
in fact read-lock) but lockdep doesn't know about this.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Waiman Long <Waiman.Long@hp.com>
Cc: ego@linux.vnet.ibm.com
Cc: bp@alien8.de
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Link: http://lkml.kernel.org/r/20140930132600.GA7444@worktop.programming.kicks-ass.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2014-10-03 06:09:30 +02:00
David S. Miller 739e4a758e Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
Conflicts:
	drivers/net/usb/r8152.c
	net/netfilter/nfnetlink.c

Both r8152 and nfnetlink conflicts were simple overlapping changes.

Signed-off-by: David S. Miller <davem@davemloft.net>
2014-10-02 11:25:43 -07:00
Joerg Roedel 09b5269a1b Merge branches 'arm/exynos', 'arm/omap', 'arm/smmu', 'x86/vt-d', 'x86/amd' and 'core' into next
Conflicts:
	drivers/iommu/arm-smmu.c
2014-10-02 12:24:45 +02:00
Joerg Roedel 57384592c4 iommu/vt-d: Store bus information in RMRR PCI device path
This will be used later to match broken RMRR entries.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
2014-10-02 12:12:25 +02:00
Linus Walleij 89168b4899 mmc: core: restore detect line inversion semantics
commit 98e90de99a
"mmc: host: switch OF parser to use gpio descriptors"
switched the semantic behaviour of card detect and read
only flags such that the inversion capability flag would
only be set if inversion was explicitly specified in the
device tree, in the hopes that no-one was using double
inversion.

It turns out that the XOR:ing between the explicit
inversion was indeed in use, so we need to restore the
old semantics where both ways of inversion are checked
and the end result XOR:ed.

Reported-by: Javier Martinez Canillas <javier@dowhile0.org>
Tested-by: Javier Martinez Canillas <javier@dowhile0.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
2014-10-02 11:51:26 +02:00
Joerg Roedel 599bad38cf driver core: Add BUS_NOTIFY_REMOVED_DEVICE event
This event closes an important gap in the bus notifiers.
There is already the BUS_NOTIFY_DEL_DEVICE event, but that
is sent when the device is still bound to its device driver.

This is too early for the IOMMU code to destroy any mappings
for the device, as they might still be in use by the driver.

The new BUS_NOTIFY_REMOVED_DEVICE event introduced with this
patch closes this gap as it is sent when the device is
already unbound from its device driver and almost completly
removed from the driver core.

With this event the IOMMU code can safely destroy any
mappings and other data structures when a device is removed.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Tested-by: Jerry Hoemann <jerry.hoemann@hp.com>
2014-10-02 11:14:34 +02:00
Herbert Xu 9561dccb45 Merge git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6
Merging the crypto tree for 3.17 to pull in the "by8" AVX CTR revert.
2014-10-02 14:37:20 +08:00
Petri Gynther d068b02cfd net: phy: add BCM7425 and BCM7429 PHYs
Signed-off-by: Petri Gynther <pgynther@google.com>
Acked-by: Florian Fainelli <f.fainelli@gmai.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-10-01 22:12:48 -04:00
Jan Kara 90a8020278 vfs: fix data corruption when blocksize < pagesize for mmaped data
->page_mkwrite() is used by filesystems to allocate blocks under a page
which is becoming writeably mmapped in some process' address space. This
allows a filesystem to return a page fault if there is not enough space
available, user exceeds quota or similar problem happens, rather than
silently discarding data later when writepage is called.

However VFS fails to call ->page_mkwrite() in all the cases where
filesystems need it when blocksize < pagesize. For example when
blocksize = 1024, pagesize = 4096 the following is problematic:
  ftruncate(fd, 0);
  pwrite(fd, buf, 1024, 0);
  map = mmap(NULL, 1024, PROT_WRITE, MAP_SHARED, fd, 0);
  map[0] = 'a';       ----> page_mkwrite() for index 0 is called
  ftruncate(fd, 10000); /* or even pwrite(fd, buf, 1, 10000) */
  mremap(map, 1024, 10000, 0);
  map[4095] = 'a';    ----> no page_mkwrite() called

At the moment ->page_mkwrite() is called, filesystem can allocate only
one block for the page because i_size == 1024. Otherwise it would create
blocks beyond i_size which is generally undesirable. But later at
->writepage() time, we also need to store data at offset 4095 but we
don't have block allocated for it.

This patch introduces a helper function filesystems can use to have
->page_mkwrite() called at all the necessary moments.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@vger.kernel.org
2014-10-01 21:49:18 -04:00
Tom Herbert 8bce6d7d0d udp: Generalize skb_udp_segment
skb_udp_segment is the function called from udp4_ufo_fragment to
segment a UDP tunnel packet. This function currently assumes
segmentation is transparent Ethernet bridging (i.e. VXLAN
encapsulation). This patch generalizes the function to
operate on either Ethertype or IP protocol.

The inner_protocol field must be set to the protocol of the inner
header. This can now be either an Ethertype or an IP protocol
(in a union). A new flag in the skbuff indicates which type is
effective. skb_set_inner_protocol and skb_set_inner_ipproto
helper functions were added to set the inner_protocol. These
functions are called from the point where the tunnel encapsulation
is occuring.

When skb_udp_tunnel_segment is called, the function to segment the
inner packet is selected based on the inner IP or Ethertype. In the
case of an IP protocol encapsulation, the function is derived from
inet[6]_offloads. In the case of Ethertype, skb->protocol is
set to the inner_protocol and skb_mac_gso_segment is called. (GRE
currently does this, but it might be possible to lookup the protocol
in offload_base and call the appropriate segmenation function
directly).

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-10-01 21:35:51 -04:00
Eric Dumazet d0bf4a9e92 net: cleanup and document skb fclone layout
Lets use a proper structure to clearly document and implement
skb fast clones.

Then, we might experiment more easily alternative layouts.

This patch adds a new skb_fclone_busy() helper, used by tcp and xfrm,
to stop leaking of implementation details.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-10-01 16:34:25 -04:00
Bjorn Helgaas 359c660e99 Merge branch 'pci/msi' into next
* pci/msi:
  PCI/MSI: Remove unnecessary temporary variable
  PCI/MSI: Use __write_msi_msg() instead of write_msi_msg()
  MSI/powerpc: Use __read_msi_msg() instead of read_msi_msg()
  PCI/MSI: Use __get_cached_msi_msg() instead of get_cached_msi_msg()
  PCI/MSI: Add "msi_bus" sysfs MSI/MSI-X control for endpoints
  PCI/MSI: Remove "pos" from the struct msi_desc msi_attrib
  PCI/MSI: Remove unused kobject from struct msi_desc
  PCI/MSI: Rename pci_msi_check_device() to pci_msi_supported()
  PCI/MSI: Move D0 check into pci_msi_check_device()
  PCI/MSI: Remove arch_msi_check_device()
  irqchip: armada-370-xp: Remove arch_msi_check_device()
  PCI/MSI/PPC: Remove arch_msi_check_device()

Conflicts:
	drivers/pci/host/pcie-designware.c
2014-10-01 12:31:46 -06:00
Bjorn Helgaas 07a7cbd3b8 Merge branch 'pci/host-generic' into next
* pci/host-generic:
  arm64: Add architectural support for PCI
  PCI: Add pci_remap_iospace() to map bus I/O resources
  of/pci: Add support for parsing PCI host bridge resources from DT
  of/pci: Add pci_get_new_domain_nr() and of_get_pci_domain_nr()
  PCI: Add generic domain handling
  of/pci: Fix the conversion of IO ranges into IO resources
  of/pci: Move of_pci_range_to_resource() to of/address.c
  ARM: Define PCI_IOBASE as the base of virtual PCI IO space
  of/pci: Add pci_register_io_range() and pci_pio_to_address()
  asm-generic/io.h: Fix ioport_map() for !CONFIG_GENERIC_IOMAP

Conflicts:
	drivers/pci/host/pci-tegra.c
2014-10-01 12:31:23 -06:00
Yijing Wang 48c3c38f00 PCI/MSI: Remove "pos" from the struct msi_desc msi_attrib
"msi_attrib.pos" is only used for MSI (not MSI-X), and we already cache the
MSI capability offset in "dev->msi_cap".

Remove "pos" from the struct msi_attrib and use "dev->msi_cap" directly.

[bhelgaas: changelog, fix whitespace]
Signed-off-by: Yijing Wang <wangyijing@huawei.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
2014-10-01 12:21:23 -06:00
Yijing Wang 81052769e4 PCI/MSI: Remove unused kobject from struct msi_desc
After commit 1c51b50c29 ("PCI/MSI: Export MSI mode using attributes, not
kobjects"), the kobject in struct msi_desc is unused.

Remove the unused struct kobject from struct msi_desc.

[bhelgaas: changelog]
Fixes: 1c51b50c29 ("PCI/MSI: Export MSI mode using attributes, not kobjects")
Signed-off-by: Yijing Wang <wangyijing@huawei.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2014-10-01 12:21:23 -06:00
Alexander Gordeev ad975ebad4 PCI/MSI: Remove arch_msi_check_device()
No architectures implement arch_msi_check_device() or the struct msi_chip
.check_device() method, so remove them.

Remove the "type" parameter to pci_msi_check_device() because it was only
used to call arch_msi_check_device() and is no longer needed.

[bhelgaas: changelog, split to separate patch]
Signed-off-by: Alexander Gordeev <agordeev@redhat.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
2014-10-01 12:21:14 -06:00
Mike Turquette a797900950 Allow parent rate changes for i2s on rk3288
and rockchip as well as s3c24xx restart handlers.
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v1
 
 iQEcBAABCAAGBQJUK/GYAAoJEPOmecmc0R2Bnc8IAJY3NpUhN2It0WqL1meeIDr7
 jGVphxhiEsAlXMbHyO0V/LhUTbGtjfDotd43Y84cZW43R7G2ErvvBsWMKCff2xIB
 cVtjXJ8JI+s2T/gBEssZDnhh1dcP04fnZ2LMdLCGudnmn2AyR2AXQQBqFiO+kQWe
 0x2dsGgcQPn93X9i9sT03BKJTZjRoCZkUh0aJ2y+yL9Y5A9CNXCVGfJ5E1GNw3Px
 ffeKA0OsOhUMw9aqb8fLvDuRd0mo1i+BSCaOeNTlwzASmxCUJ9MOOKtF/Khx7x2r
 6N3w/EN4F8R+qJ/gOyZR9I3SrnLU1rMBOHvaSxDJb0RXAq5AROvQS1t4GiF2A68=
 =bvGz
 -----END PGP SIGNATURE-----

Merge tag 'v3.18-rockchip-clk2' of git://git.kernel.org/pub/scm/linux/kernel/git/mmind/linux-rockchip into clk-next

Allow parent rate changes for i2s on rk3288
and rockchip as well as s3c24xx restart handlers.
2014-10-01 11:19:10 -07:00
Heiko Stuebner aec1d96c53 Immutable branch with restart handler patches for v3.18
-----BEGIN PGP SIGNATURE-----
 Version: GnuPG v1
 
 iQIcBAABAgAGBQJUJQ8/AAoJEMsfJm/On5mBMNgP+QEUHpRKJaOGU3jX/ftHH/t3
 EoNUx7lZt6Q0c9MB2ySAxILYpWUujc9N0tDkRDyW7mTWunF8gEGiRN+iKaSbzcUN
 Y4VffRAbxBasIaBqRtpDl08ycODh6Xu1t8sAao03DdhnMNLGNNO79s3UFHsubdTC
 cXx9mfYR/2SHV/0BXiFvKi8ovdqUspdp9cyZO/qc0PVFGbsADx3MNGGzkvWfgvcE
 6vXnKnUkZrNl5JPiG77kTKZnDsjEMXggmA9DGWKijFCJjGIbuLiuIDf63Zp+eQ52
 mJMRA+ViP/dDgAxY1dkWBcF5nOBT1vTYwLfy69jEoQeHzcomiHVoDKmCSBOpeAEH
 G8VoasWKWYpYnlcOJb+XgkA3QTe6mOPgAPzNsbYr0Ep7hMFw66mOQgKbgi6k4Qts
 HHimG9pnBYpPlBUfvNh+6K4dHAm0C2IyoZyMhKWsyFH6hkhS8TVM8j0gPR8rTTmk
 0a9/e2vxcFnfBe3UAJaqzWRVFsBkOHrTNpG1hvID3Oq8IeywSBXw2VMSR93+mwaB
 sa/GCZKlqHGpOfmtILlhiXQX0E/tTHmcrI2VqyCpX0J2CW+MiGvkcGOwKHOJciSA
 Cj9D68y837QU/DCpMQ6ec/5wqWqZKz8yQb8kxb6vJcL19JcVKdAiPzbuOI49C3Ux
 YxDWoUutzDfVoUD5RhcJ
 =cP1w
 -----END PGP SIGNATURE-----

Merge tag 'tags/restart-handler-for-v3.18' into v3.18-next/cpuclk

Immutable branch with restart handler patches for v3.18
2014-10-01 11:04:37 +02:00
Benjamin Tissoires 7704ac9373 HID: wacom: implement generic HID handling for pen generic devices
ISDv4 and v5 are plain HID devices. We can directly implement a generic
HID parsing/handling and remove the need to manually add those PID in
the list of supported devices.

This patch implements the pen support only. The finger part will come in
a later patch.

To be properly notified of an .event() and a .report(), we need to force
hid-core to go through the HID parsing. By default, wacom.ko binds only
hidraw, so the hid parsing is not done by hid-core. When a true HID device
is there, we add the flag HID_CLAIMED_DRIVER to hid->claimed which will
force hid-core to parse the incoming reports.
(Note that this can be easily backported by directly setting the .claimed
flag to HID_CLAIMED_DRIVER even if hid-core does not support
HID_CONNECT_DRIVER)

Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Acked-by: Jason Gerecke <killertofu@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
2014-10-01 09:11:23 +02:00
Liviu Dudau 8b921acfef PCI: Add pci_remap_iospace() to map bus I/O resources
Add pci_remap_iospace() to map bus I/O resources into the CPU virtual
address space.  Architectures with special needs may provide their own
version, but most should be able to use this one.

This function is useful for PCI host bridge drivers that need to map the
PCI I/O resources into virtual memory space.

[bhelgaas: phys_addr description, drop temporary "err" variable]
Signed-off-by: Liviu Dudau <Liviu.Dudau@arm.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
CC: Arnd Bergmann <arnd@arndb.de>
2014-09-30 17:08:57 -06:00
Liviu Dudau cbe4097f8a of/pci: Add support for parsing PCI host bridge resources from DT
Provide a function to parse the PCI DT ranges that can be used to create a
pci_host_bridge structure together with its associated bus.

Signed-off-by: Liviu Dudau <Liviu.Dudau@arm.com>
[make io_base parameter optional]
Signed-off-by: Robert Richter <rrichter@cavium.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
CC: Arnd Bergmann <arnd@arndb.de>
CC: Grant Likely <grant.likely@linaro.org>
CC: Rob Herring <robh+dt@kernel.org>
CC: Catalin Marinas <catalin.marinas@arm.com>
2014-09-30 17:08:57 -06:00
Liviu Dudau 41e5c0f81d of/pci: Add pci_get_new_domain_nr() and of_get_pci_domain_nr()
Add pci_get_new_domain_nr() to allocate a new domain number and
of_get_pci_domain_nr() to retrieve the PCI domain number of a given device
from DT.  Host bridge drivers or architecture-specific code can choose to
implement their PCI domain number policy using these two functions.

Using of_get_pci_domain_nr() guarantees a stable PCI domain number on every
boot provided that all host bridge controllers are assigned a number in the
device tree using "linux,pci-domain" property.  Mixing use of
pci_get_new_domain_nr() and of_get_pci_domain_nr() is not recommended as it
can lead to potentially conflicting domain numbers being assigned to root
buses behind different host bridges.

Signed-off-by: Liviu Dudau <Liviu.Dudau@arm.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
CC: Arnd Bergmann <arnd@arndb.de>
CC: Grant Likely <grant.likely@linaro.org>
CC: Rob Herring <robh+dt@kernel.org>
CC: Catalin Marinas <catalin.marinas@arm.com>
2014-09-30 17:08:57 -06:00
Catalin Marinas 670ba0c888 PCI: Add generic domain handling
The handling of PCI domains (or PCI segments in ACPI speak) is usually a
straightforward affair but its implementation is currently left to the
architectural code, with pci_domain_nr(b) querying the value of the domain
associated with bus b.

This patch introduces CONFIG_PCI_DOMAINS_GENERIC as an option that can be
selected if an architecture wants a simple implementation where the value
of the domain associated with a bus is stored in struct pci_bus.

The architectures that select CONFIG_PCI_DOMAINS_GENERIC will then have to
implement pci_bus_assign_domain_nr() as a way of setting the domain number
associated with a root bus.  All child buses except the root bus will
inherit the domain_nr value from their parent.

Signed-off-by: Catalin Marinas <Catalin.Marinas@arm.com>
[Renamed pci_set_domain_nr() to pci_bus_assign_domain_nr()]
Signed-off-by: Liviu Dudau <Liviu.Dudau@arm.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
CC: Arnd Bergmann <arnd@arndb.de>
2014-09-30 17:08:57 -06:00
Liviu Dudau 0b0b0893d4 of/pci: Fix the conversion of IO ranges into IO resources
The ranges property for a host bridge controller in DT describes the
mapping between the PCI bus address and the CPU physical address.  The
resources framework however expects that the IO resources start at a pseudo
"port" address 0 (zero) and have a maximum size of IO_SPACE_LIMIT.  The
conversion from PCI ranges to resources failed to take that into account,
returning a CPU physical address instead of a port number.

Also fix all the drivers that depend on the old behaviour by fetching the
CPU physical address based on the port number where it is being needed.

Signed-off-by: Liviu Dudau <Liviu.Dudau@arm.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
CC: Grant Likely <grant.likely@linaro.org>
CC: Rob Herring <robh+dt@kernel.org>
CC: Arnd Bergmann <arnd@arndb.de>
CC: Thierry Reding <thierry.reding@gmail.com>
CC: Simon Horman <horms@verge.net.au>
CC: Catalin Marinas <catalin.marinas@arm.com>
2014-09-30 17:08:40 -06:00
Trond Myklebust b4b56796fe Merge branch 'client-4.2' into linux-next
Merge NFSv4.2 client SEEK implementation from Anna

* client-4.2: (55 commits)
  NFS: Implement SEEK
  NFSD: Implement SEEK
  NFSD: Add generic v4.2 infrastructure
  svcrdma: advertise the correct max payload
  nfsd: introduce nfsd4_callback_ops
  nfsd: split nfsd4_callback initialization and use
  nfsd: introduce a generic nfsd4_cb
  nfsd: remove nfsd4_callback.cb_op
  nfsd: do not clear rpc_resp in nfsd4_cb_done_sequence
  nfsd: fix nfsd4_cb_recall_done error handling
  nfsd4: clarify how grace period ends
  nfsd4: stop grace_time update at end of grace period
  nfsd: skip subsequent UMH "create" operations after the first one for v4.0 clients
  nfsd: set and test NFSD4_CLIENT_STABLE bit to reduce nfsdcltrack upcalls
  nfsd: serialize nfsdcltrack upcalls for a particular client
  nfsd: pass extra info in env vars to upcalls to allow for early grace period end
  nfsd: add a v4_end_grace file to /proc/fs/nfsd
  lockd: add a /proc/fs/lockd/nlm_end_grace file
  nfsd: reject reclaim request when client has already sent RECLAIM_COMPLETE
  nfsd: remove redundant boot_time parm from grace_done client tracking op
  ...
2014-09-30 17:22:02 -04:00
Martin K. Petersen c611529e7c sd: Honor block layer integrity handling flags
A set of flags introduced in the block layer enable better control over
how protection information is handled. These flags are useful for both
error injection and data recovery purposes. Checking can be enabled and
disabled for controller and disk, and the guard tag format is now a
per-I/O property.

Update sd_protect_op to communicate the relevant information to the
low-level device driver via a set of flags in scsi_cmnd.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Sagi Grimberg <sagig@mellanox.com>
Acked-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
2014-09-30 15:17:35 -06:00
Majd Dibbiny e1c00e10e9 net/mlx4_core: New init and exit flow for mlx4_core
In the new flow, we separate the pci initialization and teardown
from the initialization and teardown of the other resources.

__mlx4_init_one handles the pci resources initialization. It then
calls mlx4_load_one to initialize the remainder of the resources.

When removing a device, mlx4_remove_one is invoked. However, now
mlx4_remove_one calls mlx4_unload_one to free all the resources except the pci
resources. When mlx4_unload_one returns, mlx4_remove_one then frees the
pci resources.

The above separation will allow us to implement 'reset flow' in the future.
It will also enable more EQs for VFs and is a pre-step to the modern API to
enable/disable SRIOV.

Also added nvfs; an integer array of size MLX4_MAX_PORTS + 1; to the mlx4_dev
struct. This new field is used to avoid parsing the num_vfs module parameter
each time the mlx4_restart_one is called.

Signed-off-by: Majd Dibbiny <majd@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-30 16:27:49 -04:00
Anna Schumaker 1c6dcbe5ce NFS: Implement SEEK
The SEEK operation is used when an application makes an lseek call with
either the SEEK_HOLE or SEEK_DATA flags set.  I fall back on
nfs_file_llseek() if the server does not have SEEK support.

Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
2014-09-30 16:24:56 -04:00
Trond Myklebust 4a3a0ebad1 Merge commit '24bab491220f' into client-4.2
- Pull in patch 'NFSD: Implement SEEK' from Bruce's nfsd-next tree
  for dependencies.
2014-09-30 16:23:39 -04:00
Liviu Dudau 83bbde1cc0 of/pci: Move of_pci_range_to_resource() to of/address.c
We need to enhance of_pci_range_to_resources() enough that it won't make
sense for it to be inline anymore.  Move it to drivers/of/address.c, under
#ifdef CONFIG_PCI.

of_address.h previously implemented of_pci_range_to_resources()
unconditionally, regardless of any config options.  The implementation in
address.c is defined only when CONFIG_OF_ADDRESS=y and CONFIG_PCI=y,
so add a dummy version to avoid build errors when CONFIG_OF or
CONFIG_OF_ADDRESS is not defined.

[bhelgaas: drop extra detail from changelog, move def under CONFIG_PCI,
add dummy of_pci_range_to_resource() for build errors (from Arnd)]
Signed-off-by: Liviu Dudau <Liviu.Dudau@arm.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
CC: Grant Likely <grant.likely@linaro.org>
CC: Rob Herring <robh+dt@kernel.org>
CC: Arnd Bergmann <arnd@arndb.de>
CC: Catalin Marinas <catalin.marinas@arm.com>
2014-09-30 13:29:09 -06:00
Rafael J. Wysocki a8d46b9e4e ACPI / sleep: Rework the handling of ACPI GPE wakeup from suspend-to-idle
The ACPI GPE wakeup from suspend-to-idle is currently based on using
the IRQF_NO_SUSPEND flag for the ACPI SCI, but that is problematic
for a couple of reasons.  First, in principle the ACPI SCI may be
shared and IRQF_NO_SUSPEND does not really work well with shared
interrupts.  Second, it may require the ACPI subsystem to special-case
the handling of device notifications depending on whether or not
they are received during suspend-to-idle in some places which would
lead to fragile code.  Finally, it's better the handle ACPI wakeup
interrupts consistently with wakeup interrupts from other sources.

For this reason, remove the IRQF_NO_SUSPEND flag from the ACPI SCI
and use enable_irq_wake()/disable_irq_wake() with it instead, which
requires two additional platform hooks to be added to struct
platform_freeze_ops.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2014-09-30 21:06:07 +02:00
Rafael J. Wysocki 2a8a8ce651 PM / sleep: Export dpm_suspend_late/noirq() and dpm_resume_early/noirq()
Subsequent change sets will add platform-related operations between
dpm_suspend_late() and dpm_suspend_noirq() as well as between
dpm_resume_noirq() and dpm_resume_early() in suspend_enter(), so
export these functions for suspend_enter() to be able to call them
separately and split the invocations of dpm_suspend_end() and
dpm_resume_start() in there accordingly.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2014-09-30 21:05:59 +02:00
Rafael J. Wysocki e4cb0c9e92 Merge branch 'pm-genirq' into acpi-pm 2014-09-30 20:46:13 +02:00
Hauke Mehrtens 2101e533f4 bcma: register bcma as device tree driver
This driver is used by the bcm53xx ARM SoC code. Now it is possible to
give the address of the chipcommon core in device tree and bcma will
search for all the other cores.

Signed-off-by: Hauke Mehrtens <hauke@hauke-m.de>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
2014-09-30 13:17:14 -04:00
Liviu Dudau 41f8bba7f5 of/pci: Add pci_register_io_range() and pci_pio_to_address()
Some architectures do not have a simple view of the PCI I/O space and
instead use a range of CPU addresses that map to bus addresses.  For some
architectures these ranges will be expressed by OF bindings in a device
tree file.

This patch introduces a pci_register_io_range() helper function with a
generic implementation that can be used by such architectures to keep track
of the I/O ranges described by the PCI bindings.  If the PCI_IOBASE macro
is not defined, that signals lack of support for PCI and we return an
error.

In order to retrieve the CPU address associated with an I/O port, a new
helper function pci_pio_to_address() is introduced.  This will search in
the list of ranges registered with pci_register_io_range() and return the
CPU address that corresponds to the given port.

[arnd: add dummy !CONFIG_OF pci_pio_to_address() to fix build errors]
Signed-off-by: Liviu Dudau <Liviu.Dudau@arm.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Rob Herring <robh@kernel.org>
CC: Grant Likely <grant.likely@linaro.org>
2014-09-30 09:42:45 -06:00
James Morris 6c8ff877cd Merge commit 'v3.16' into next 2014-10-01 00:44:04 +10:00
Mark Brown ad21edcdb2 Merge remote-tracking branches 'regulator/topic/tps65217', 'regulator/topic/tps65910' and 'regulator/topic/voltage-ev' into regulator-next 2014-09-30 13:50:31 +01:00
Mark Brown 6d9deb7ad4 Merge remote-tracking branches 'regulator/topic/rk808', 'regulator/topic/rn5t618' and 'regulator/topic/samsung' into regulator-next 2014-09-30 13:50:30 +01:00
Mark Brown 64b285ad7b Merge remote-tracking branches 'regulator/topic/max1586', 'regulator/topic/max77802' and 'regulator/topic/of' into regulator-next 2014-09-30 13:50:29 +01:00
Mark Brown a81bf3c4fc Merge remote-tracking branches 'regulator/topic/drivers', 'regulator/topic/enable', 'regulator/topic/fan53555', 'regulator/topic/hi6421' and 'regulator/topic/isl9305' into regulator-next 2014-09-30 13:50:27 +01:00
Mark Brown 95528a55db Merge remote-tracking branches 'regulator/topic/as3711', 'regulator/topic/axp20x', 'regulator/topic/bcm590xx' and 'regulator/topic/da9211' into regulator-next 2014-09-30 13:50:25 +01:00
Mike Turquette 82de1bc86c Merge branch 'for-v3.18/ti-clk-driver' of github.com:t-kristo/linux-pm into clk-next 2014-09-29 23:38:59 -07:00
Doug Anderson d8e0a86f97 i2c: cros_ec: Remove EC_I2C_FLAG_10BIT
In <https://lkml.org/lkml/2014/6/10/265> pointed out that the 10-bit
flag in the cros_ec_tunnel was useless.  It went into a 16-bit flags
field but was defined at (1 << 16).

Since we have no 10-bit i2c devices on the other side of the tunnel on
any known devices this was never a problem.  Until we do it makes
sense to remove this code.  On the EC side the code to handle this
flag was removed in <https://chromium-review.googlesource.com/204162>.

Reported-by: Dave Jones <davej@redhat.com>
Signed-off-by: Doug Anderson <dianders@chromium.org>
Reviewed-by: Simon Glass <sjg@chromium.org>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
2014-09-30 08:06:02 +02:00
Sebastian Andrzej Siewior baeb7ef349 tty: serial: 8250: use 32bit variable for rpm_tx_active
The kbuild test robot wrote me:
|  make.cross ARCH=powerpc
|>> ERROR: ".__xchg_called_with_bad_pointer" [drivers/tty/serial/8250/8250.ko] undefined!

The generic implementation of xchg() on arm and x86 works for variables of
size one bye (char). According to the report powerpc does not support
xchg() for one byte sized variables and looking further it seems also to
be the same case for sparc and tile (or for 10 out of 26 architectures
which provide a custom implementation).
For that reason I increase the size of the variable from one to four
bytes to get it work on powerpc (and the others).

Reported-By: kbuild test robot <fengguang.wu@intel.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2014-09-29 18:20:38 -07:00
Ulf Hansson f48c767ce8 PM / Domains: Move dev_pm_domain_attach|detach() to pm_domain.h
The commit 46420dd73b (PM / Domains: Add APIs to attach/detach a PM
domain for a device) started using errno values in pm.h header file.
It also failed to include the header for these, thus it caused
compiler errors.

Instead of including the errno header to pm.h, let's move the functions
to pm_domain.h, since it's a better match.

Fixes: 46420dd73b (PM / Domains: Add APIs to attach/detach a PM domain for a device)
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Acked-by: Geert Uytterhoeven <geert+renesas@glider.be>
Acked-by: Wolfram Sang <wsa@the-dreams.de>
Acked-by: Mark Brown <broonie@kernel.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2014-09-30 01:16:44 +02:00
Tan, Raymond 4bcfda0993 i2c: designware: add support of platform data to set I2C mode
Use the platform data to set the clk_freq when there is no DT configuration
available. The clk_freq in turn will determine the I2C speed mode.

In Quark, there is currently no other configuration mechanism other than
board files.

Signed-off-by: Raymond Tan <raymond.tan@intel.com>
Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Reviewed-by: Hock Leong Kweh <hock.leong.kweh@intel.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
2014-09-29 22:41:17 +02:00
Michael Braun 79cf79abce macvlan: add source mode
This patch adds a new mode of operation to macvlan, called "source".
It allows one to set a list of allowed mac address, which is used
to match against source mac address from received frames on underlying
interface.
This enables creating mac based VLAN associations, instead of standard
port or tag based. The feature is useful to deploy 802.1x mac based
behavior, where drivers of underlying interfaces doesn't allows that.

Configuration is done through the netlink interface using e.g.:
 ip link add link eth0 name macvlan0 type macvlan mode source
 ip link add link eth0 name macvlan1 type macvlan mode source
 ip link set link dev macvlan0 type macvlan macaddr add 00:11:11:11:11:11
 ip link set link dev macvlan0 type macvlan macaddr add 00:22:22:22:22:22
 ip link set link dev macvlan0 type macvlan macaddr add 00:33:33:33:33:33
 ip link set link dev macvlan1 type macvlan macaddr add 00:33:33:33:33:33
 ip link set link dev macvlan1 type macvlan macaddr add 00:44:44:44:44:44

This allows clients with MAC addresses 00:11:11:11:11:11,
00:22:22:22:22:22 to be part of only VLAN associated with macvlan0
interface. Clients with MAC addresses 00:44:44:44:44:44 with only VLAN
associated with macvlan1 interface. And client with MAC address
00:33:33:33:33:33 to be associated with both VLANs.

Based on work of Stefan Gula <steweg@gmail.com>

v8: last version of Stefan Gula for Kernel 3.2.1
v9: rework onto linux-next 2014-03-12 by Michael Braun
    add MACADDR_SET command, enable to configure mac for source mode
    while creating interface
v10:
  - reduce indention level
  - rename source_list to source_entry
  - use aligned 64bit ether address
  - use hash_64 instead of addr[5]
v11:
  - rebase for 3.14 / linux-next 20.04.2014
v12
  - rebase for linux-next 2014-09-25

Signed-off-by: Michael Braun <michael-dev@fami-braun.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-29 15:37:01 -04:00
David S. Miller 852248449c Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next
Pablo Neira Ayuso says:

====================
pull request: netfilter/ipvs updates for net-next

The following patchset contains Netfilter/IPVS updates for net-next,
most relevantly they are:

1) Four patches to make the new nf_tables masquerading support
   independent of the x_tables infrastructure. This also resolves a
   compilation breakage if the masquerade target is disabled but the
   nf_tables masq expression is enabled.

2) ipset updates via Jozsef Kadlecsik. This includes the addition of the
   skbinfo extension that allows you to store packet metainformation in the
   elements. This can be used to fetch and restore this to the packets through
   the iptables SET target, patches from Anton Danilov.

3) Add the hash:mac set type to ipset, from Jozsef Kadlecsick.

4) Add simple weighted fail-over scheduler via Simon Horman. This provides
   a fail-over IPVS scheduler (unlike existing load balancing schedulers).
   Connections are directed to the appropriate server based solely on
   highest weight value and server availability, patch from Kenny Mathis.

5) Support IPv6 real servers in IPv4 virtual-services and vice versa.
   Simon Horman informs that the motivation for this is to allow more
   flexibility in the choice of IP version offered by both virtual-servers
   and real-servers as they no longer need to match: An IPv4 connection
   from an end-user may be forwarded to a real-server using IPv6 and
   vice versa. No ip_vs_sync support yet though. Patches from Alex Gartrell
   and Julian Anastasov.

6) Add global generation ID to the nf_tables ruleset. When dumping from
   several different object lists, we need a way to identify that an update
   has ocurred so userspace knows that it needs to refresh its lists. This
   also includes a new command to obtain the 32-bits generation ID. The
   less significant 16-bits of this ID is also exposed through res_id field
   in the nfnetlink header to quickly detect the interference and retry when
   there is no risk of ID wraparound.

7) Move br_netfilter out of the bridge core. The br_netfilter code is
   built in the bridge core by default. This causes problems of different
   kind to people that don't want this: Jesper reported performance drop due
   to the inconditional hook registration and I remember to have read complains
   on netdev from people regarding the unexpected behaviour of our bridging
   stack when br_netfilter is enabled (fragmentation handling, layer 3 and
   upper inspection). People that still need this should easily undo the
   damage by modprobing the new br_netfilter module.

8) Dump the set policy nf_tables that allows set parameterization. So
   userspace can keep user-defined preferences when saving the ruleset.
   From Arturo Borrero.

9) Use __seq_open_private() helper function to reduce boiler plate code
   in x_tables, From Rob Jones.

10) Safer default behaviour in case that you forget to load the protocol
   tracker. Daniel Borkmann and Florian Westphal detected that if your
   ruleset is stateful, you allow traffic to at least one single SCTP port
   and the SCTP protocol tracker is not loaded, then any SCTP traffic may
   be pass through unfiltered. After this patch, the connection tracking
   classifies SCTP/DCCP/UDPlite/GRE packets as invalid if your kernel has
   been compiled with support for these modules.
====================

Trivially resolved conflict in include/linux/skbuff.h, Eric moved some
netfilter skbuff members around, and the netfilter tree adjusted the
ifdef guards for the bridging info pointer.

Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-29 14:46:53 -04:00
Michael Grzeschik c51da42a63 ARCNET: add support for multi interfaces on com20020
The com20020-pci driver is currently designed to instance
one netdev with one pci device. This patch adds support to
instance many cards with one pci device, depending on the device
data in the private data.

Signed-off-by: Michael Grzeschik <m.grzeschik@pengutronix.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-29 14:36:26 -04:00
Michael Grzeschik 8c14f9c703 ARCNET: add com20020 PCI IDs with metadata
This patch adds metadata for the com20020 to prepare for devices with
multiple io address areas with multi card interfaces.

Signed-off-by: Michael Grzeschik <m.grzeschik@pengutronix.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-29 14:36:26 -04:00
Anna Schumaker 24bab49122 NFSD: Implement SEEK
This patch adds server support for the NFS v4.2 operation SEEK, which
returns the position of the next hole or data segment in a file.

Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
2014-09-29 14:35:20 -04:00
Anna Schumaker 87a15a8090 NFSD: Add generic v4.2 infrastructure
It's cleaner to introduce everything at once and have the server reply
with "not supported" than it would be to introduce extra operations when
implementing a specific one in the middle of the list.

Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
2014-09-29 14:35:19 -04:00
Eric Dumazet b193722731 net: reorganize sk_buff for faster __copy_skb_header()
With proliferation of bit fields in sk_buff, __copy_skb_header() became
quite expensive, showing as the most expensive function in a GSO
workload.

__copy_skb_header() performance is also critical for non GSO TCP
operations, as it is used from skb_clone()

This patch carefully moves all the fields that were not copied in a
separate zone : cloned, nohdr, fclone, peeked, head_frag, xmit_more

Then I moved all other fields and all other copied fields in a section
delimited by headers_start[0]/headers_end[0] section so that we
can use a single memcpy() call, inlined by compiler using long
word load/stores.

I also tried to make all copies in the natural orders of sk_buff,
to help hardware prefetching.

I made sure sk_buff size did not change.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-29 12:27:20 -04:00
Will Deacon c02607aad2 iommu: introduce domain attribute for nesting IOMMUs
Some IOMMUs, such as the ARM SMMU, support two stages of translation.
The idea behind such a scheme is to allow a guest operating system to
use the IOMMU for DMA mappings in the first stage of translation, with
the hypervisor then installing mappings in the second stage to provide
isolation of the DMA to the physical range assigned to that virtual
machine.

In order to allow IOMMU domains to be used for second-stage translation,
this patch adds a new iommu_attr (IOMMU_ATTR_NESTING) for setting
second-stage domains prior to device attach. The attribute can also be
queried to see if a domain is actually making use of nesting.

Acked-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2014-09-29 10:05:06 -06:00
Tomas Winkler 1f180359f4 mei: remove include to pci header from mei module files
Remove inclusion of linux/pci.h in mei layer
however we need to include the headers that before
got included implicitly from linux/pci.h.

Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2014-09-29 11:56:02 -04:00
Sergei Shtylyov 0043325495 usb: hcd: add generic PHY support
Add the generic PHY support, analogous to the USB PHY support. Intended it to be
used with the PCI EHCI/OHCI drivers and the xHCI platform driver.

Signed-off-by: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Signed-off-by: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2014-09-29 11:54:02 -04:00
Antoine Tenart 3d46e73dfd usb: rename phy to usb_phy in HCD
The USB PHY member of the HCD structure is renamed to 'usb_phy' and
modifications are done in all drivers accessing it.
This is in preparation to adding the generic PHY support.

Signed-off-by: Antoine Tenart <antoine.tenart@free-electrons.com>
[Sergei: added missing 'drivers/usb/misc/lvstest.c' file, resolved rejects,
updated changelog.]
Signed-off-by: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
Acked-by: Felipe Balbi <balbi@ti.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2014-09-29 11:52:59 -04:00
Tero Kristo c08ee14cc6 clk: ti: change clock init to use generic of_clk_init
Previously, the TI clock driver initialized all the clocks hierarchically
under each separate clock provider node. Now, each clock that requires
IO access will instead check their parent node to find out which IO range
to use.

This patch allows the TI clock driver to use a few new features provided
by the generic of_clk_init, and also allows registration of clock nodes
outside the clock hierarchy (for example, any external clocks.)

Signed-off-by: Tero Kristo <t-kristo@ti.com>
Cc: Mike Turquette <mturquette@linaro.org>
Cc: Paul Walmsley <paul@pwsan.com>
Cc: Tony Lindgren <tony@atomide.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Peter Ujfalusi <peter.ujfalusi@ti.com>
Cc: Jyri Sarha <jsarha@ti.com>
Cc: Stefan Assmann <sassmann@kpanic.de>
Acked-by: Tony Lindgren <tony@atomide.com>
2014-09-29 11:51:13 +03:00
Eric Dumazet 3d9a0d2f82 dql: dql_queued() should write first to reduce bus transactions
While doing high throughput test on a BQL enabled NIC,
I found a very high cost in ndo_start_xmit() when accessing BQL data.

It turned out the problem was caused by compiler trying to be
smart, but involving a bad MESI transaction :

  0.05 │  mov    0xc0(%rax),%edi    // LOAD dql->num_queued
  0.48 │  mov    %edx,0xc8(%rax)    // STORE dql->last_obj_cnt = count
 58.23 │  add    %edx,%edi
  0.58 │  cmp    %edi,0xc4(%rax)
  0.76 │  mov    %edi,0xc0(%rax)    // STORE dql->num_queued += count
  0.72 │  js     bd8

I got an incredible 10 % gain [1] by making sure cpu do not attempt
to get the cache line in Shared mode, but directly requests for
ownership.

New code :
	mov    %edx,0xc8(%rax)  // STORE dql->last_obj_cnt = count
	add    %edx,0xc0(%rax)  // RMW   dql->num_queued += count
	mov    0xc4(%rax),%ecx  // LOAD dql->adj_limit
	mov    0xc0(%rax),%edx  // LOAD dql->num_queued
	cmp    %edx,%ecx

The TX completion was running from another cpu, with high interrupts
rate.

Note that I am using barrier() as a soft hint, as mb() here could be
too heavy cost.

[1] This was a netperf TCP_STREAM with TSO disabled, but GSO enabled.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-29 00:04:55 -04:00
Dan Williams 7bced39751 net_dma: simple removal
Per commit "77873803363c net_dma: mark broken" net_dma is no longer used
and there is no plan to fix it.

This is the mechanical removal of bits in CONFIG_NET_DMA ifdef guards.
Reverting the remainder of the net_dma induced changes is deferred to
subsequent patches.

Marked for stable due to Roman's report of a memory leak in
dma_pin_iovec_pages():

    https://lkml.org/lkml/2014/9/3/177

Cc: Dave Jiang <dave.jiang@intel.com>
Cc: Vinod Koul <vinod.koul@intel.com>
Cc: David Whipple <whipple@securedatainnovations.ch>
Cc: Alexander Duyck <alexander.h.duyck@intel.com>
Cc: <stable@vger.kernel.org>
Reported-by: Roman Gushchin <klamm@yandex-team.ru>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
2014-09-28 07:05:16 -07:00
Linus Torvalds 1e3827bf8a Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
Pull vfs fixes from Al Viro:
 "Assorted fixes + unifying __d_move() and __d_materialise_dentry() +
  minimal regression fix for d_path() of victims of overwriting rename()
  ported on top of that"

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs:
  vfs: Don't exchange "short" filenames unconditionally.
  fold swapping ->d_name.hash into switch_names()
  fold unlocking the children into dentry_unlock_parents_for_move()
  kill __d_materialise_dentry()
  __d_materialise_dentry(): flip the order of arguments
  __d_move(): fold manipulations with ->d_child/->d_subdirs
  don't open-code d_rehash() in d_materialise_unique()
  pull rehashing and unlocking the target dentry into __d_materialise_dentry()
  ufs: deal with nfsd/iget races
  fuse: honour max_read and max_write in direct_io mode
  shmem: fix nlink for rename overwrite directory
2014-09-27 17:05:14 -07:00
Linus Torvalds 6111da3432 Merge branch 'for-3.17-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup
Pull cgroup fixes from Tejun Heo:
 "This is quite late but these need to be backported anyway.

  This is the fix for a long-standing cpuset bug which existed from
  2009.  cpuset makes use of PF_SPREAD_{PAGE|SLAB} flags to modify the
  task's memory allocation behavior according to the settings of the
  cpuset it belongs to; unfortunately, when those flags have to be
  changed, cpuset did so directly even whlie the target task is running,
  which is obviously racy as task->flags may be modified by the task
  itself at any time.  This obscure bug manifested as corrupt
  PF_USED_MATH flag leading to a weird crash.

  The bug is fixed by moving the flag to task->atomic_flags.  The first
  two are prepatory ones to help defining atomic_flags accessors and the
  third one is the actual fix"

* 'for-3.17-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup:
  cpuset: PF_SPREAD_PAGE and PF_SPREAD_SLAB should be atomic flags
  sched: add macros to define bitops for task atomic flags
  sched: fix confusing PFA_NO_NEW_PRIVS constant
2014-09-27 16:45:33 -07:00
Mike Turquette 4dc7ed32f3 Allwinner Clocks Additions for 3.18
The most important part of this serie is the addition of the phase API to
 handle the MMC clocks in the Allwinner SoCs.
 
 Apart from that, the A23 gained a new mbus driver, and there's a fix for a
 incorrect divider table on the APB0 clock.
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v1
 
 iQIcBAABAgAGBQJUJm3zAAoJEBx+YmzsjxAgxJwQAJk3+Oq3J54jzRxKLGjUpfy9
 Ma9p/78ZSnYlYWrEn62vzu7sGeMJsPo4Lsmy+Hch2r765+PzFZw9oDaxjFT5poQy
 Mv8F7Uyetc99sGAfmg/fKnzgQpp1t+9+kB42cV6lzjXolqX/ACcIjzFOzROXEF9B
 2bnQ3RwXqvQhKKryDBg9+hJYt1R15d4SxQ7Rn6lb6WsZTxjGVO0cvvU3tp4QGQgg
 ZDUkJNLzLYdMK9XUNyqreatmz+HMxL5vYHeEWFz388ECp9DRUPT3MqlQcUqgSLlD
 eMqQPOnd5p5ZEUdB8qAAtf4kIbQTaVa7/4u37sE/+fogw6Pq/6a2Jqppl9aJWD7I
 PDFjxSMl77W5mQZSEanbc0a0qmqAqtZokDusP0bc0ETSZzmPVvohjW5Fa9Awyi0j
 PeN2bTglaFDPsHxKlQ31HF/e/almXkpiIXegeG0e/3VrGSrghFMQtqLEUXgVPu10
 4PV8x7O2ib1VVAowwOb10qGv0fLGC8UCqL9zXVNlCy268ijjKMlNyK3U1sllphba
 fWBYgtg9+1YHONI1SewuYibAqROC7ICDXiqDkJVb6UWmO39HBcOFDb3HJ0EIj8T4
 9v1clkVy1vONIqfvi1SeTekLovpROOxhxGtyXTpdx5qdlVhBjkEsNVHc5jh6BPHr
 o9TlBnnmIPajvF9wMN+H
 =ZkI9
 -----END PGP SIGNATURE-----

Merge tag 'sunxi-clocks-for-3.18' of git://git.kernel.org/pub/scm/linux/kernel/git/mripard/linux into clk-next

Allwinner Clocks Additions for 3.18

The most important part of this serie is the addition of the phase API to
handle the MMC clocks in the Allwinner SoCs.

Apart from that, the A23 gained a new mbus driver, and there's a fix for a
incorrect divider table on the APB0 clock.
2014-09-27 12:52:33 -07:00
Martin K. Petersen 2341c2f8c3 block: Add T10 Protection Information functions
The T10 Protection Information format is also used by some devices that
do not go through the SCSI layer (virtual block devices, NVMe). Relocate
the relevant functions to a block layer library that can be used without
involving SCSI.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
2014-09-27 09:14:59 -06:00
Martin K. Petersen 4eaf99bead block: Don't merge requests if integrity flags differ
We'd occasionally merge requests with conflicting integrity flags.
Introduce a merge helper which checks that the requests have compatible
integrity payloads.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Sagi Grimberg <sagig@mellanox.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
2014-09-27 09:14:57 -06:00
Martin K. Petersen aae7df5019 block: Integrity checksum flag
Make the choice of checksum a per-I/O property by introducing a flag
that can be inspected by the SCSI layer. There are several reasons for
this:

 1. It allows us to switch choice of checksum without unloading and
    reloading the HBA driver.

 2. During error recovery we need to be able to tell the HBA that
    checksums read from disk should not be verified and converted to IP
    checksums.

 3. For error injection purposes we need to be able to write a bad guard
    tag to storage. Since the storage device only supports T10 CRC we
    need to be able to disable IP checksum conversion on the HBA.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Sagi Grimberg <sagig@mellanox.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
2014-09-27 09:14:55 -06:00
Martin K. Petersen b1f0138857 block: Relocate bio integrity flags
Move flags affecting the integrity code out of the bio bi_flags and into
the block integrity payload.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Sagi Grimberg <sagig@mellanox.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
2014-09-27 09:14:54 -06:00
Martin K. Petersen 3aec2f41a8 block: Add a disk flag to block integrity profile
So far we have relied on the app tag size to determine whether a disk
has been formatted with T10 protection information or not. However, not
all target devices provide application tag storage.

Add a flag to the block integrity profile that indicates whether the
disk has been formatted with protection information.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Sagi Grimberg <sagig@dev.mellanox.co.il>
Signed-off-by: Jens Axboe <axboe@fb.com>
2014-09-27 09:14:51 -06:00
Martin K. Petersen 8288f496eb block: Add prefix to block integrity profile flags
Add a BLK_ prefix to the integrity profile flags. Also rename the flags
to be more consistent with the generate/verify terminology in the rest
of the integrity code.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Sagi Grimberg <sagig@mellanox.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
2014-09-27 09:14:51 -06:00
Martin K. Petersen 1859308853 block: Clean up the code used to generate and verify integrity metadata
Instead of the "operate" parameter we pass in a seed value and a pointer
to a function that can be used to process the integrity metadata. The
generation function is changed to have a return value to fit into this
scheme.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Sagi Grimberg <sagig@mellanox.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
2014-09-27 09:14:51 -06:00
Martin K. Petersen 3be91c4a3d block: Deprecate the use of the term sector in the context of block integrity
The protection interval is not necessarily tied to the logical block
size of a block device. Stop using the terms "sector" and "sectors".

Going forward we will use the term "seed" to describe the initial
reference tag value for a given I/O. "Interval" will be used to describe
the portion of the data buffer that a given piece of protection
information is associated with.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Sagi Grimberg <sagig@mellanox.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
2014-09-27 09:14:50 -06:00
Martin K. Petersen 5f9378fa9c block: Remove bip_buf
bip_buf is not really needed so we can remove it.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Sagi Grimberg <sagig@mellanox.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
2014-09-27 09:14:50 -06:00
Martin K. Petersen 8492b68bc4 block: Remove integrity tagging functions
None of the filesystems appear interested in using the integrity tagging
feature. Potentially because very few storage devices actually permit
using the application tag space.

Remove the tagging functions.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Sagi Grimberg <sagig@mellanox.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
2014-09-27 09:14:50 -06:00
Martin K. Petersen 180b2f95dd block: Replace bi_integrity with bi_special
For commands like REQ_COPY we need a way to pass extra information along
with each bio. Like integrity metadata this information must be
available at the bottom of the stack so bi_private does not suffice.

Rename the existing bi_integrity field to bi_special and make it a union
so we can have different bio extensions for each class of command.

We previously used bi_integrity != NULL as a way to identify whether a
bio had integrity metadata or not. Introduce a REQ_INTEGRITY to be the
indicator now that bi_special can contain different things.

In addition, bio_integrity(bio) will now return a pointer to the
integrity payload (when applicable).

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Sagi Grimberg <sagig@mellanox.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
2014-09-27 09:14:46 -06:00
Martin K. Petersen e7258c1a26 block: Get rid of bdev_integrity_enabled()
bdev_integrity_enabled() is only used by bio_integrity_enabled().
Combine these two functions.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Sagi Grimberg <sagig@mellanox.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
2014-09-27 09:14:44 -06:00
Paolo Bonzini e77d99d4a4 Changes for KVM for arm/arm64 for 3.18
This includes a bunch of changes:
  - Support read-only memory slots on arm/arm64
  - Various changes to fix Sparse warnings
  - Correctly detect write vs. read Stage-2 faults
  - Various VGIC cleanups and fixes
  - Dynamic VGIC data strcuture sizing
  - Fix SGI set_clear_pend offset bug
  - Fix VTTBR_BADDR Mask
  - Correctly report the FSC on Stage-2 faults
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v1
 
 iQEcBAABAgAGBQJUJWAdAAoJEEtpOizt6ddy9cMH+gIoUPnRJLe+PPcOOyxOx6pr
 +CnD/zAd0sLvxZLP/LBOzu99H3YrbO5kwI/172/8G1zUNI2hp6YxEEJaBCTHrz6l
 RwgLy7a3EMMY51nJo5w2dkFUo8cUX9MsHqMpl2Xb7Dvo2ZHp+nDqRjwRY6yi+t4V
 dWSJTRG6X+DIWyysij6jBtfKU6MpU+4NW3Zdk1fapf8QDkn+cBtV5X2QcmERCaIe
 A1j9hiGi43KA3XWeeePU3aVaxC2XUhTayP8VsfVxoNG2manaS6lqjmbif5ghs/0h
 rw7R3/Aj0MJny2zT016MkvKJKRukuVRD6e1lcYghqnSJhL2FossowZ9fHRADpqU=
 =QgU8
 -----END PGP SIGNATURE-----

Merge tag 'kvm-arm-for-3.18' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm into kvm-next

Changes for KVM for arm/arm64 for 3.18

This includes a bunch of changes:
 - Support read-only memory slots on arm/arm64
 - Various changes to fix Sparse warnings
 - Correctly detect write vs. read Stage-2 faults
 - Various VGIC cleanups and fixes
 - Dynamic VGIC data strcuture sizing
 - Fix SGI set_clear_pend offset bug
 - Fix VTTBR_BADDR Mask
 - Correctly report the FSC on Stage-2 faults

Conflicts:
	virt/kvm/eventfd.c
	[duplicate, different patch where the kvm-arm version broke x86.
	 The kvm tree instead has the right one]
2014-09-27 11:03:33 +02:00
Maxime Ripard 9824cf73c3 clk: Add a function to retrieve phase
The current phase API doesn't look into the actual hardware to get the phase
value, but will rather get it from a variable only set by the set_phase
function.

This will cause issue when the client driver will never call the set_phase
function, where we can end up having a reported phase that will not match what
the hardware has been programmed to by the bootloader or what phase is
programmed out of reset.

Add a new get_phase function for the drivers to implement so that we can get
this value.

Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>
Reviewed-by: Heiko Stuebner <heiko@sntech.de>
Acked-by: Hans de Goede <hdegoede@redhat.com>
2014-09-27 08:57:51 +02:00
Maxime Ripard 355bb165cd clk: Include of.h in clock-provider.h
CLK_OF_DECLARE relies on OF_DECLARE_1 that is defined in of.h. Fixes build
errors when one use CLK_OF_DECLARE but doesn't include of.h

Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>
Acked-by: Hans de Goede <hdegoede@redhat.com>
2014-09-27 08:57:50 +02:00
Mike Turquette e59c5371fb clk: introduce clk_set_phase function & callback
A common operation for a clock signal generator is to shift the phase of
that signal. This patch introduces a new function to the clk.h API to
dynamically adjust the phase of a clock signal. Additionally this patch
introduces support for the new function in the common clock framework
via the .set_phase call back in struct clk_ops.

Signed-off-by: Mike Turquette <mturquette@linaro.org>
Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>
Reviewed-by: Heiko Stuebner <heiko@sntech.de>
Acked-by: Hans de Goede <hdegoede@redhat.com>
2014-09-27 08:57:38 +02:00
Miklos Szeredi 2c80929c4c fuse: honour max_read and max_write in direct_io mode
The third argument of fuse_get_user_pages() "nbytesp" refers to the number of
bytes a caller asked to pack into fuse request. This value may be lesser
than capacity of fuse request or iov_iter.  So fuse_get_user_pages() must
ensure that *nbytesp won't grow.

Now, when helper iov_iter_get_pages() performs all hard work of extracting
pages from iov_iter, it can be done by passing properly calculated
"maxsize" to the helper.

The other caller of iov_iter_get_pages() (dio_refill_pages()) doesn't need
this capability, so pass LONG_MAX as the maxsize argument here.

Fixes: c9c37e2e63 ("fuse: switch to iov_iter_get_pages()")
Reported-by: Werner Baumann <werner.baumann@onlinehome.de>
Tested-by: Maxim Patlasov <mpatlasov@parallels.com>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2014-09-26 21:16:51 -04:00
Jyri Sarha c873d14d30 clk: add gpio gated clock
The added gpio-gate-clock is a basic clock that can be enabled and
disabled trough a gpio output. The DT binding document for the clock
is also added. For EPROBE_DEFER handling the registering of the clock
has to be delayed until of_clk_get() call time.

Signed-off-by: Jyri Sarha <jsarha@ti.com>
Signed-off-by: Mike Turquette <mturquette@linaro.org>
2014-09-26 16:51:42 -07:00
Mike Turquette b6b2fe5b6e Tegra clk updates for 3.18
-----BEGIN PGP SIGNATURE-----
 Version: GnuPG v1.4.11 (GNU/Linux)
 
 iQIcBAABAgAGBQJUHBvYAAoJEFFb18rurjwTyaoP/37XBxRXfN2mL+WroUC7rb4m
 qb7PKg0RTOjjWV6Dp2EzmXThxsy0sGRpML9ZNp6LYIqPOH4sBHNeRK0+Qf1dB1XZ
 /DySj4oqEgJ+x8GHTqceM5x4C5fwDrF4sl5uacYIZcA42ih3nY331zlJaNPpkSpx
 iP91SJsQghK9OpdnOTl8oOV9p6PIjFfn712GPIRL7PBIBkVswdkJHufozzvcX+mI
 O30/FGEGbQCmCs1pFXufFNwJQcOy0qK5zyZPhGbzyji9gkCXz4OQOqWLdE1kXbWY
 CDfolp0Xdr+hLN/6YkQBEkZSjvKzLZf8OOYmVsXWXplVDjLf9/2p/Kq33p8JGKUa
 R8BC17MtmKjNgJ7g0Zo870meRWDcsLOfr60Wu4MIhzSJKSVDp6oq0ZqYay10/ZuF
 iMv49Mcpbgl4d0D6DgJztMPYwQZrMOkTLByGo+s/UxB+gY2XtvUO/6mPchYfhYUX
 M7y28Im4j5pmTKw5WhulfDWTBUPul8tvqGLdB4i9zKOA0iszby7pnGzjKfZd4RYg
 FD5Byzd225X92R/kKxWPTv4SQBM7flVkfvJLbOV2SiedoCHcyLXJjtSgaFlE3XYY
 gUt9uqM5cwiw6hwPVzP1su02419L6cZYkCWOlVMNlMZ3fjd6js6NUtNjxH30tU7H
 TrwJ4tAYaIkuTw0jBnFE
 =WIPU
 -----END PGP SIGNATURE-----

Merge tag 'tegra-clk-3.18' of git://nv-tegra.nvidia.com/user/pdeschrijver/linux into clk-next

Tegra clk updates for 3.18
2014-09-26 16:09:39 -07:00
Eric Dumazet f4a775d144 net: introduce __skb_header_release()
While profiling TCP stack, I noticed one useless atomic operation
in tcp_sendmsg(), caused by skb_header_release().

It turns out all current skb_header_release() users have a fresh skb,
that no other user can see, so we can avoid one atomic operation.

Introduce __skb_header_release() to clearly document this.

This gave me a 1.5 % improvement on TCP_RR workload.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 15:40:06 -04:00
David S. Miller 57219dc7bf Merge tag 'master-2014-09-16' of git://git.kernel.org/pub/scm/linux/kernel/git/linville/wireless-next
John W. Linville says:

====================
pull request: wireless-next 2014-09-22

Please pull this batch of updates intended for the 3.18 stream...

For the mac80211 bits, Johannes says:

"This time, I have some rate minstrel improvements, support for a very
small feature from CCX that Steinar reverse-engineered, dynamic ACK
timeout support, a number of changes for TDLS, early support for radio
resource measurement and many fixes. Also, I'm changing a number of
places to clear key memory when it's freed and Intel claims copyright
for code they developed."

For the bluetooth bits, Johan says:

"Here are some more patches intended for 3.18. Most of them are cleanups
or fixes for SMP. The only exception is a fix for BR/EDR L2CAP fixed
channels which should now work better together with the L2CAP
information request procedure."

For the iwlwifi bits, Emmanuel says:

"I fix here dvm which was broken by my last pull request. Arik
continues to work on TDLS and Luca solved a few issues in CT-Kill. Eyal
keeps digging into rate scaling code, more to come soon. Besides this,
nothing really special here."

Beyond that, there are the usual big batches of updates to ath9k, b43,
mwifiex, and wil6210 as well as a handful of other bits here and there.
Also, rtlwifi gets some btcoexist attention from Larry.

Please let me know if there are problems!
====================

Had to adjust the wil6210 code to comply with Joe Perches's recent
change in net-next to make the netdev_*() routines return void instead
of 'int'.

Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 15:39:24 -04:00
Joe Perches 6ea754eb76 net: Change netdev_<level> logging functions to return void
No caller or macro uses the return value so make all
the functions return void.

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 15:17:17 -04:00
Alexei Starovoitov 17a5267067 bpf: verifier (add verifier core)
This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6

This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields

Kernel subsystem configures the verifier with two callbacks:

- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
  that provides information to the verifer which fields of 'ctx'
  are accessible (remember 'ctx' is the first argument to eBPF program)

- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
  returns argument constraints of kernel helper functions that eBPF program
  may call, so that verifier can checks that R1-R5 types match the prototype

More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 15:05:15 -04:00
Alexei Starovoitov 0246e64d9a bpf: handle pseudo BPF_LD_IMM64 insn
eBPF programs passed from userspace are using pseudo BPF_LD_IMM64 instructions
to refer to process-local map_fd. Scan the program for such instructions and
if FDs are valid, convert them to 'struct bpf_map' pointers which will be used
by verifier to check access to maps in bpf_map_lookup/update() calls.
If program passes verifier, convert pseudo BPF_LD_IMM64 into generic by dropping
BPF_PSEUDO_MAP_FD flag.

Note that eBPF interpreter is generic and knows nothing about pseudo insns.

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-26 15:05:15 -04:00