00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014 #include <linux/kernel.h>
00015 #include <linux/module.h>
00016 #include <linux/spinlock.h>
00017 #include <linux/fs.h>
00018 #include <linux/mm.h>
00019 #include <linux/swap.h>
00020 #include <linux/slab.h>
00021 #include <linux/pagemap.h>
00022 #include <linux/writeback.h>
00023 #include <linux/init.h>
00024 #include <linux/backing-dev.h>
00025 #include <linux/task_io_accounting_ops.h>
00026 #include <linux/blkdev.h>
00027 #include <linux/mpage.h>
00028 #include <linux/rmap.h>
00029 #include <linux/percpu.h>
00030 #include <linux/notifier.h>
00031 #include <linux/smp.h>
00032 #include <linux/sysctl.h>
00033 #include <linux/cpu.h>
00034 #include <linux/syscalls.h>
00035 #include <linux/buffer_head.h>
00036 #include <linux/pagevec.h>
00037
00038
00039
00040
00041
00042
00043
00044
00045 #define MAX_WRITEBACK_PAGES 1024
00046
00047
00048
00049
00050
00051 static long ratelimit_pages = 32;
00052
00053
00054
00055
00056
00057
00058
00059 static inline long sync_writeback_pages(void)
00060 {
00061 return ratelimit_pages + ratelimit_pages / 2;
00062 }
00063
00064
00065
00066
00067
00068
00069 int dirty_background_ratio = 5;
00070
00071
00072
00073
00074
00075 unsigned long dirty_background_bytes;
00076
00077
00078
00079
00080
00081 int vm_highmem_is_dirtyable;
00082
00083
00084
00085
00086 int vm_dirty_ratio = 10;
00087
00088
00089
00090
00091
00092 unsigned long vm_dirty_bytes;
00093
00094
00095
00096
00097 #ifndef DDE_LINUX
00098 int dirty_writeback_interval = 5 * HZ;
00099 #else
00100 int dirty_writeback_interval = 1250;
00101 #endif
00102
00103 #ifndef DDE_LINUX
00104
00105
00106
00107 int dirty_expire_interval = 30 * HZ;
00108 #else
00109 int dirty_expire_interval = 7500;
00110 #endif
00111
00112
00113
00114
00115 int block_dump;
00116
00117
00118
00119
00120
00121 int laptop_mode;
00122
00123 EXPORT_SYMBOL(laptop_mode);
00124
00125
00126
00127
00128 static void background_writeout(unsigned long _min_pages);
00129
00130
00131
00132
00133
00134
00135
00136
00137
00138
00139
00140
00141
00142
00143
00144
00145
00146 static struct prop_descriptor vm_completions;
00147 static struct prop_descriptor vm_dirties;
00148
00149
00150
00151
00152
00153
00154 static int calc_period_shift(void)
00155 {
00156 unsigned long dirty_total;
00157
00158 if (vm_dirty_bytes)
00159 dirty_total = vm_dirty_bytes / PAGE_SIZE;
00160 else
00161 dirty_total = (vm_dirty_ratio * determine_dirtyable_memory()) /
00162 100;
00163 return 2 + ilog2(dirty_total - 1);
00164 }
00165
00166
00167
00168
00169 static void update_completion_period(void)
00170 {
00171 int shift = calc_period_shift();
00172 prop_change_shift(&vm_completions, shift);
00173 prop_change_shift(&vm_dirties, shift);
00174 }
00175
00176 int dirty_background_ratio_handler(struct ctl_table *table, int write,
00177 struct file *filp, void __user *buffer, size_t *lenp,
00178 loff_t *ppos)
00179 {
00180 int ret;
00181
00182 ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos);
00183 if (ret == 0 && write)
00184 dirty_background_bytes = 0;
00185 return ret;
00186 }
00187
00188 int dirty_background_bytes_handler(struct ctl_table *table, int write,
00189 struct file *filp, void __user *buffer, size_t *lenp,
00190 loff_t *ppos)
00191 {
00192 int ret;
00193
00194 ret = proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos);
00195 if (ret == 0 && write)
00196 dirty_background_ratio = 0;
00197 return ret;
00198 }
00199
00200 int dirty_ratio_handler(struct ctl_table *table, int write,
00201 struct file *filp, void __user *buffer, size_t *lenp,
00202 loff_t *ppos)
00203 {
00204 int old_ratio = vm_dirty_ratio;
00205 int ret;
00206
00207 ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos);
00208 if (ret == 0 && write && vm_dirty_ratio != old_ratio) {
00209 update_completion_period();
00210 vm_dirty_bytes = 0;
00211 }
00212 return ret;
00213 }
00214
00215
00216 int dirty_bytes_handler(struct ctl_table *table, int write,
00217 struct file *filp, void __user *buffer, size_t *lenp,
00218 loff_t *ppos)
00219 {
00220 unsigned long old_bytes = vm_dirty_bytes;
00221 int ret;
00222
00223 ret = proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos);
00224 if (ret == 0 && write && vm_dirty_bytes != old_bytes) {
00225 update_completion_period();
00226 vm_dirty_ratio = 0;
00227 }
00228 return ret;
00229 }
00230
00231
00232
00233
00234
00235 static inline void __bdi_writeout_inc(struct backing_dev_info *bdi)
00236 {
00237 __prop_inc_percpu_max(&vm_completions, &bdi->completions,
00238 bdi->max_prop_frac);
00239 }
00240
00241 void bdi_writeout_inc(struct backing_dev_info *bdi)
00242 {
00243 unsigned long flags;
00244
00245 local_irq_save(flags);
00246 __bdi_writeout_inc(bdi);
00247 local_irq_restore(flags);
00248 }
00249 EXPORT_SYMBOL_GPL(bdi_writeout_inc);
00250
00251 void task_dirty_inc(struct task_struct *tsk)
00252 {
00253 prop_inc_single(&vm_dirties, &tsk->dirties);
00254 }
00255
00256
00257
00258
00259 static void bdi_writeout_fraction(struct backing_dev_info *bdi,
00260 long *numerator, long *denominator)
00261 {
00262 if (bdi_cap_writeback_dirty(bdi)) {
00263 prop_fraction_percpu(&vm_completions, &bdi->completions,
00264 numerator, denominator);
00265 } else {
00266 *numerator = 0;
00267 *denominator = 1;
00268 }
00269 }
00270
00271
00272
00273
00274
00275
00276 static void
00277 clip_bdi_dirty_limit(struct backing_dev_info *bdi, long dirty, long *pbdi_dirty)
00278 {
00279 long avail_dirty;
00280
00281 avail_dirty = dirty -
00282 (global_page_state(NR_FILE_DIRTY) +
00283 global_page_state(NR_WRITEBACK) +
00284 global_page_state(NR_UNSTABLE_NFS) +
00285 global_page_state(NR_WRITEBACK_TEMP));
00286
00287 if (avail_dirty < 0)
00288 avail_dirty = 0;
00289
00290 avail_dirty += bdi_stat(bdi, BDI_RECLAIMABLE) +
00291 bdi_stat(bdi, BDI_WRITEBACK);
00292
00293 *pbdi_dirty = min(*pbdi_dirty, avail_dirty);
00294 }
00295
00296 static inline void task_dirties_fraction(struct task_struct *tsk,
00297 long *numerator, long *denominator)
00298 {
00299 prop_fraction_single(&vm_dirties, &tsk->dirties,
00300 numerator, denominator);
00301 }
00302
00303
00304
00305
00306
00307
00308
00309
00310 static void task_dirty_limit(struct task_struct *tsk, long *pdirty)
00311 {
00312 long numerator, denominator;
00313 long dirty = *pdirty;
00314 u64 inv = dirty >> 3;
00315
00316 task_dirties_fraction(tsk, &numerator, &denominator);
00317 inv *= numerator;
00318 do_div(inv, denominator);
00319
00320 dirty -= inv;
00321 if (dirty < *pdirty/2)
00322 dirty = *pdirty/2;
00323
00324 *pdirty = dirty;
00325 }
00326
00327
00328
00329
00330 static DEFINE_SPINLOCK(bdi_lock);
00331 static unsigned int bdi_min_ratio;
00332
00333 int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio)
00334 {
00335 int ret = 0;
00336 unsigned long flags;
00337
00338 spin_lock_irqsave(&bdi_lock, flags);
00339 if (min_ratio > bdi->max_ratio) {
00340 ret = -EINVAL;
00341 } else {
00342 min_ratio -= bdi->min_ratio;
00343 if (bdi_min_ratio + min_ratio < 100) {
00344 bdi_min_ratio += min_ratio;
00345 bdi->min_ratio += min_ratio;
00346 } else {
00347 ret = -EINVAL;
00348 }
00349 }
00350 spin_unlock_irqrestore(&bdi_lock, flags);
00351
00352 return ret;
00353 }
00354
00355 int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned max_ratio)
00356 {
00357 unsigned long flags;
00358 int ret = 0;
00359
00360 if (max_ratio > 100)
00361 return -EINVAL;
00362
00363 spin_lock_irqsave(&bdi_lock, flags);
00364 if (bdi->min_ratio > max_ratio) {
00365 ret = -EINVAL;
00366 } else {
00367 bdi->max_ratio = max_ratio;
00368 bdi->max_prop_frac = (PROP_FRAC_BASE * max_ratio) / 100;
00369 }
00370 spin_unlock_irqrestore(&bdi_lock, flags);
00371
00372 return ret;
00373 }
00374 EXPORT_SYMBOL(bdi_set_max_ratio);
00375
00376
00377
00378
00379
00380
00381
00382
00383
00384
00385
00386
00387
00388
00389
00390
00391
00392
00393
00394 static unsigned long highmem_dirtyable_memory(unsigned long total)
00395 {
00396 #ifdef CONFIG_HIGHMEM
00397 int node;
00398 unsigned long x = 0;
00399
00400 for_each_node_state(node, N_HIGH_MEMORY) {
00401 struct zone *z =
00402 &NODE_DATA(node)->node_zones[ZONE_HIGHMEM];
00403
00404 x += zone_page_state(z, NR_FREE_PAGES) + zone_lru_pages(z);
00405 }
00406
00407
00408
00409
00410
00411
00412 return min(x, total);
00413 #else
00414 return 0;
00415 #endif
00416 }
00417
00418
00419
00420
00421
00422
00423
00424 unsigned long determine_dirtyable_memory(void)
00425 {
00426 unsigned long x;
00427
00428 x = global_page_state(NR_FREE_PAGES) + global_lru_pages();
00429
00430 if (!vm_highmem_is_dirtyable)
00431 x -= highmem_dirtyable_memory(x);
00432
00433 return x + 1;
00434 }
00435
00436 void
00437 get_dirty_limits(unsigned long *pbackground, unsigned long *pdirty,
00438 unsigned long *pbdi_dirty, struct backing_dev_info *bdi)
00439 {
00440 unsigned long background;
00441 unsigned long dirty;
00442 unsigned long available_memory = determine_dirtyable_memory();
00443 struct task_struct *tsk;
00444
00445 if (vm_dirty_bytes)
00446 dirty = DIV_ROUND_UP(vm_dirty_bytes, PAGE_SIZE);
00447 else {
00448 int dirty_ratio;
00449
00450 dirty_ratio = vm_dirty_ratio;
00451 if (dirty_ratio < 5)
00452 dirty_ratio = 5;
00453 dirty = (dirty_ratio * available_memory) / 100;
00454 }
00455
00456 if (dirty_background_bytes)
00457 background = DIV_ROUND_UP(dirty_background_bytes, PAGE_SIZE);
00458 else
00459 background = (dirty_background_ratio * available_memory) / 100;
00460
00461 if (background >= dirty)
00462 background = dirty / 2;
00463 tsk = current;
00464 if (tsk->flags & PF_LESS_THROTTLE || rt_task(tsk)) {
00465 background += background / 4;
00466 dirty += dirty / 4;
00467 }
00468 *pbackground = background;
00469 *pdirty = dirty;
00470
00471 if (bdi) {
00472 u64 bdi_dirty;
00473 long numerator, denominator;
00474
00475
00476
00477
00478 bdi_writeout_fraction(bdi, &numerator, &denominator);
00479
00480 bdi_dirty = (dirty * (100 - bdi_min_ratio)) / 100;
00481 bdi_dirty *= numerator;
00482 do_div(bdi_dirty, denominator);
00483 bdi_dirty += (dirty * bdi->min_ratio) / 100;
00484 if (bdi_dirty > (dirty * bdi->max_ratio) / 100)
00485 bdi_dirty = dirty * bdi->max_ratio / 100;
00486
00487 *pbdi_dirty = bdi_dirty;
00488 clip_bdi_dirty_limit(bdi, dirty, pbdi_dirty);
00489 task_dirty_limit(current, pbdi_dirty);
00490 }
00491 }
00492
00493
00494
00495
00496
00497
00498
00499
00500 static void balance_dirty_pages(struct address_space *mapping)
00501 {
00502 long nr_reclaimable, bdi_nr_reclaimable;
00503 long nr_writeback, bdi_nr_writeback;
00504 unsigned long background_thresh;
00505 unsigned long dirty_thresh;
00506 unsigned long bdi_thresh;
00507 unsigned long pages_written = 0;
00508 unsigned long write_chunk = sync_writeback_pages();
00509
00510 struct backing_dev_info *bdi = mapping->backing_dev_info;
00511
00512 for (;;) {
00513 struct writeback_control wbc = {
00514 .bdi = bdi,
00515 .sync_mode = WB_SYNC_NONE,
00516 .older_than_this = NULL,
00517 .nr_to_write = write_chunk,
00518 .range_cyclic = 1,
00519 };
00520
00521 get_dirty_limits(&background_thresh, &dirty_thresh,
00522 &bdi_thresh, bdi);
00523
00524 nr_reclaimable = global_page_state(NR_FILE_DIRTY) +
00525 global_page_state(NR_UNSTABLE_NFS);
00526 nr_writeback = global_page_state(NR_WRITEBACK);
00527
00528 bdi_nr_reclaimable = bdi_stat(bdi, BDI_RECLAIMABLE);
00529 bdi_nr_writeback = bdi_stat(bdi, BDI_WRITEBACK);
00530
00531 if (bdi_nr_reclaimable + bdi_nr_writeback <= bdi_thresh)
00532 break;
00533
00534
00535
00536
00537
00538
00539 if (nr_reclaimable + nr_writeback <
00540 (background_thresh + dirty_thresh) / 2)
00541 break;
00542
00543 if (!bdi->dirty_exceeded)
00544 bdi->dirty_exceeded = 1;
00545
00546
00547
00548
00549
00550
00551
00552 if (bdi_nr_reclaimable) {
00553 writeback_inodes(&wbc);
00554 pages_written += write_chunk - wbc.nr_to_write;
00555 get_dirty_limits(&background_thresh, &dirty_thresh,
00556 &bdi_thresh, bdi);
00557 }
00558
00559
00560
00561
00562
00563
00564
00565
00566
00567
00568
00569 if (bdi_thresh < 2*bdi_stat_error(bdi)) {
00570 bdi_nr_reclaimable = bdi_stat_sum(bdi, BDI_RECLAIMABLE);
00571 bdi_nr_writeback = bdi_stat_sum(bdi, BDI_WRITEBACK);
00572 } else if (bdi_nr_reclaimable) {
00573 bdi_nr_reclaimable = bdi_stat(bdi, BDI_RECLAIMABLE);
00574 bdi_nr_writeback = bdi_stat(bdi, BDI_WRITEBACK);
00575 }
00576
00577 if (bdi_nr_reclaimable + bdi_nr_writeback <= bdi_thresh)
00578 break;
00579 if (pages_written >= write_chunk)
00580 break;
00581
00582 congestion_wait(WRITE, HZ/10);
00583 }
00584
00585 if (bdi_nr_reclaimable + bdi_nr_writeback < bdi_thresh &&
00586 bdi->dirty_exceeded)
00587 bdi->dirty_exceeded = 0;
00588
00589 if (writeback_in_progress(bdi))
00590 return;
00591
00592
00593
00594
00595
00596
00597
00598
00599
00600 if ((laptop_mode && pages_written) ||
00601 (!laptop_mode && (global_page_state(NR_FILE_DIRTY)
00602 + global_page_state(NR_UNSTABLE_NFS)
00603 > background_thresh)))
00604 pdflush_operation(background_writeout, 0);
00605 }
00606
00607 void set_page_dirty_balance(struct page *page, int page_mkwrite)
00608 {
00609 if (set_page_dirty(page) || page_mkwrite) {
00610 struct address_space *mapping = page_mapping(page);
00611
00612 if (mapping)
00613 balance_dirty_pages_ratelimited(mapping);
00614 }
00615 }
00616
00617
00618
00619
00620
00621
00622
00623
00624
00625
00626
00627
00628
00629
00630
00631 void balance_dirty_pages_ratelimited_nr(struct address_space *mapping,
00632 unsigned long nr_pages_dirtied)
00633 {
00634 static DEFINE_PER_CPU(unsigned long, ratelimits) = 0;
00635 unsigned long ratelimit;
00636 unsigned long *p;
00637
00638 ratelimit = ratelimit_pages;
00639 if (mapping->backing_dev_info->dirty_exceeded)
00640 ratelimit = 8;
00641
00642
00643
00644
00645
00646 preempt_disable();
00647 p = &__get_cpu_var(ratelimits);
00648 *p += nr_pages_dirtied;
00649 if (unlikely(*p >= ratelimit)) {
00650 *p = 0;
00651 preempt_enable();
00652 balance_dirty_pages(mapping);
00653 return;
00654 }
00655 preempt_enable();
00656 }
00657 EXPORT_SYMBOL(balance_dirty_pages_ratelimited_nr);
00658
00659 void throttle_vm_writeout(gfp_t gfp_mask)
00660 {
00661 unsigned long background_thresh;
00662 unsigned long dirty_thresh;
00663
00664 for ( ; ; ) {
00665 get_dirty_limits(&background_thresh, &dirty_thresh, NULL, NULL);
00666
00667
00668
00669
00670
00671 dirty_thresh += dirty_thresh / 10;
00672
00673 if (global_page_state(NR_UNSTABLE_NFS) +
00674 global_page_state(NR_WRITEBACK) <= dirty_thresh)
00675 break;
00676 congestion_wait(WRITE, HZ/10);
00677
00678
00679
00680
00681
00682
00683 if ((gfp_mask & (__GFP_FS|__GFP_IO)) != (__GFP_FS|__GFP_IO))
00684 break;
00685 }
00686 }
00687
00688
00689
00690
00691
00692 static void background_writeout(unsigned long _min_pages)
00693 {
00694 long min_pages = _min_pages;
00695 struct writeback_control wbc = {
00696 .bdi = NULL,
00697 .sync_mode = WB_SYNC_NONE,
00698 .older_than_this = NULL,
00699 .nr_to_write = 0,
00700 .nonblocking = 1,
00701 .range_cyclic = 1,
00702 };
00703
00704 for ( ; ; ) {
00705 unsigned long background_thresh;
00706 unsigned long dirty_thresh;
00707
00708 get_dirty_limits(&background_thresh, &dirty_thresh, NULL, NULL);
00709 if (global_page_state(NR_FILE_DIRTY) +
00710 global_page_state(NR_UNSTABLE_NFS) < background_thresh
00711 && min_pages <= 0)
00712 break;
00713 wbc.more_io = 0;
00714 wbc.encountered_congestion = 0;
00715 wbc.nr_to_write = MAX_WRITEBACK_PAGES;
00716 wbc.pages_skipped = 0;
00717 writeback_inodes(&wbc);
00718 min_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write;
00719 if (wbc.nr_to_write > 0 || wbc.pages_skipped > 0) {
00720
00721 if (wbc.encountered_congestion || wbc.more_io)
00722 congestion_wait(WRITE, HZ/10);
00723 else
00724 break;
00725 }
00726 }
00727 }
00728
00729
00730
00731
00732
00733
00734 int wakeup_pdflush(long nr_pages)
00735 {
00736 if (nr_pages == 0)
00737 nr_pages = global_page_state(NR_FILE_DIRTY) +
00738 global_page_state(NR_UNSTABLE_NFS);
00739 return pdflush_operation(background_writeout, nr_pages);
00740 }
00741
00742 #ifndef DDE_LINUX
00743 static void wb_timer_fn(unsigned long unused);
00744 static void laptop_timer_fn(unsigned long unused);
00745
00746 static DEFINE_TIMER(wb_timer, wb_timer_fn, 0, 0);
00747 static DEFINE_TIMER(laptop_mode_wb_timer, laptop_timer_fn, 0, 0);
00748
00749
00750
00751
00752
00753
00754
00755
00756
00757
00758
00759
00760
00761
00762
00763
00764 static void wb_kupdate(unsigned long arg)
00765 {
00766 unsigned long oldest_jif;
00767 unsigned long start_jif;
00768 unsigned long next_jif;
00769 long nr_to_write;
00770 struct writeback_control wbc = {
00771 .bdi = NULL,
00772 .sync_mode = WB_SYNC_NONE,
00773 .older_than_this = &oldest_jif,
00774 .nr_to_write = 0,
00775 .nonblocking = 1,
00776 .for_kupdate = 1,
00777 .range_cyclic = 1,
00778 };
00779
00780 sync_supers();
00781
00782 oldest_jif = jiffies - dirty_expire_interval;
00783 start_jif = jiffies;
00784 next_jif = start_jif + dirty_writeback_interval;
00785 nr_to_write = global_page_state(NR_FILE_DIRTY) +
00786 global_page_state(NR_UNSTABLE_NFS) +
00787 (inodes_stat.nr_inodes - inodes_stat.nr_unused);
00788 while (nr_to_write > 0) {
00789 wbc.more_io = 0;
00790 wbc.encountered_congestion = 0;
00791 wbc.nr_to_write = MAX_WRITEBACK_PAGES;
00792 writeback_inodes(&wbc);
00793 if (wbc.nr_to_write > 0) {
00794 if (wbc.encountered_congestion || wbc.more_io)
00795 congestion_wait(WRITE, HZ/10);
00796 else
00797 break;
00798 }
00799 nr_to_write -= MAX_WRITEBACK_PAGES - wbc.nr_to_write;
00800 }
00801 if (time_before(next_jif, jiffies + HZ))
00802 next_jif = jiffies + HZ;
00803 if (dirty_writeback_interval)
00804 mod_timer(&wb_timer, next_jif);
00805 }
00806
00807
00808
00809
00810 int dirty_writeback_centisecs_handler(ctl_table *table, int write,
00811 struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
00812 {
00813 proc_dointvec_userhz_jiffies(table, write, file, buffer, length, ppos);
00814 if (dirty_writeback_interval)
00815 mod_timer(&wb_timer, jiffies + dirty_writeback_interval);
00816 else
00817 del_timer(&wb_timer);
00818 return 0;
00819 }
00820
00821 static void wb_timer_fn(unsigned long unused)
00822 {
00823 if (pdflush_operation(wb_kupdate, 0) < 0)
00824 mod_timer(&wb_timer, jiffies + HZ);
00825 }
00826
00827 static void laptop_flush(unsigned long unused)
00828 {
00829 sys_sync();
00830 }
00831
00832 static void laptop_timer_fn(unsigned long unused)
00833 {
00834 pdflush_operation(laptop_flush, 0);
00835 }
00836
00837
00838
00839
00840
00841
00842 void laptop_io_completion(void)
00843 {
00844 mod_timer(&laptop_mode_wb_timer, jiffies + laptop_mode);
00845 }
00846
00847
00848
00849
00850
00851
00852 void laptop_sync_completion(void)
00853 {
00854 del_timer(&laptop_mode_wb_timer);
00855 }
00856 #endif
00857
00858
00859
00860
00861
00862
00863
00864
00865
00866
00867
00868
00869
00870
00871
00872
00873
00874
00875 void writeback_set_ratelimit(void)
00876 {
00877 ratelimit_pages = vm_total_pages / (num_online_cpus() * 32);
00878 if (ratelimit_pages < 16)
00879 ratelimit_pages = 16;
00880 if (ratelimit_pages * PAGE_CACHE_SIZE > 4096 * 1024)
00881 ratelimit_pages = (4096 * 1024) / PAGE_CACHE_SIZE;
00882 }
00883
00884 static int __cpuinit
00885 ratelimit_handler(struct notifier_block *self, unsigned long u, void *v)
00886 {
00887 writeback_set_ratelimit();
00888 return NOTIFY_DONE;
00889 }
00890
00891 static struct notifier_block __cpuinitdata ratelimit_nb = {
00892 .notifier_call = ratelimit_handler,
00893 .next = NULL,
00894 };
00895
00896
00897
00898
00899
00900
00901
00902
00903
00904
00905
00906
00907
00908
00909
00910
00911
00912
00913
00914 void __init page_writeback_init(void)
00915 {
00916 int shift;
00917
00918 #ifndef DDE_LINUX
00919 mod_timer(&wb_timer, jiffies + dirty_writeback_interval);
00920 #endif
00921 writeback_set_ratelimit();
00922 register_cpu_notifier(&ratelimit_nb);
00923
00924 shift = calc_period_shift();
00925 prop_descriptor_init(&vm_completions, shift);
00926 prop_descriptor_init(&vm_dirties, shift);
00927 }
00928
00929
00930
00931
00932
00933
00934
00935
00936
00937
00938
00939
00940
00941
00942
00943
00944 int write_cache_pages(struct address_space *mapping,
00945 struct writeback_control *wbc, writepage_t writepage,
00946 void *data)
00947 {
00948 struct backing_dev_info *bdi = mapping->backing_dev_info;
00949 int ret = 0;
00950 int done = 0;
00951 struct pagevec pvec;
00952 int nr_pages;
00953 pgoff_t uninitialized_var(writeback_index);
00954 pgoff_t index;
00955 pgoff_t end;
00956 pgoff_t done_index;
00957 int cycled;
00958 int range_whole = 0;
00959 long nr_to_write = wbc->nr_to_write;
00960
00961 if (wbc->nonblocking && bdi_write_congested(bdi)) {
00962 wbc->encountered_congestion = 1;
00963 return 0;
00964 }
00965
00966 pagevec_init(&pvec, 0);
00967 if (wbc->range_cyclic) {
00968 writeback_index = mapping->writeback_index;
00969 index = writeback_index;
00970 if (index == 0)
00971 cycled = 1;
00972 else
00973 cycled = 0;
00974 end = -1;
00975 } else {
00976 index = wbc->range_start >> PAGE_CACHE_SHIFT;
00977 end = wbc->range_end >> PAGE_CACHE_SHIFT;
00978 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
00979 range_whole = 1;
00980 cycled = 1;
00981 }
00982 retry:
00983 done_index = index;
00984 while (!done && (index <= end)) {
00985 int i;
00986
00987 nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
00988 PAGECACHE_TAG_DIRTY,
00989 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
00990 if (nr_pages == 0)
00991 break;
00992
00993 for (i = 0; i < nr_pages; i++) {
00994 struct page *page = pvec.pages[i];
00995
00996
00997
00998
00999
01000
01001
01002
01003 if (page->index > end) {
01004
01005
01006
01007
01008 done = 1;
01009 break;
01010 }
01011
01012 done_index = page->index + 1;
01013
01014 lock_page(page);
01015
01016
01017
01018
01019
01020
01021
01022
01023
01024 if (unlikely(page->mapping != mapping)) {
01025 continue_unlock:
01026 unlock_page(page);
01027 continue;
01028 }
01029
01030 if (!PageDirty(page)) {
01031
01032 goto continue_unlock;
01033 }
01034
01035 if (PageWriteback(page)) {
01036 if (wbc->sync_mode != WB_SYNC_NONE)
01037 wait_on_page_writeback(page);
01038 else
01039 goto continue_unlock;
01040 }
01041
01042 BUG_ON(PageWriteback(page));
01043 if (!clear_page_dirty_for_io(page))
01044 goto continue_unlock;
01045
01046 ret = (*writepage)(page, wbc, data);
01047 if (unlikely(ret)) {
01048 if (ret == AOP_WRITEPAGE_ACTIVATE) {
01049 unlock_page(page);
01050 ret = 0;
01051 } else {
01052
01053
01054
01055
01056
01057
01058
01059
01060
01061 done = 1;
01062 break;
01063 }
01064 }
01065
01066 if (nr_to_write > 0) {
01067 nr_to_write--;
01068 if (nr_to_write == 0 &&
01069 wbc->sync_mode == WB_SYNC_NONE) {
01070
01071
01072
01073
01074
01075
01076
01077
01078
01079
01080 done = 1;
01081 break;
01082 }
01083 }
01084
01085 if (wbc->nonblocking && bdi_write_congested(bdi)) {
01086 wbc->encountered_congestion = 1;
01087 done = 1;
01088 break;
01089 }
01090 }
01091 pagevec_release(&pvec);
01092 cond_resched();
01093 }
01094 if (!cycled && !done) {
01095
01096
01097
01098
01099
01100 cycled = 1;
01101 index = 0;
01102 end = writeback_index - 1;
01103 goto retry;
01104 }
01105 if (!wbc->no_nrwrite_index_update) {
01106 if (wbc->range_cyclic || (range_whole && nr_to_write > 0))
01107 mapping->writeback_index = done_index;
01108 wbc->nr_to_write = nr_to_write;
01109 }
01110
01111 return ret;
01112 }
01113 EXPORT_SYMBOL(write_cache_pages);
01114
01115 #ifndef DDE_LINUX
01116
01117
01118
01119
01120 static int __writepage(struct page *page, struct writeback_control *wbc,
01121 void *data)
01122 {
01123 struct address_space *mapping = data;
01124 int ret = mapping->a_ops->writepage(page, wbc);
01125 mapping_set_error(mapping, ret);
01126 return ret;
01127 }
01128
01129
01130
01131
01132
01133
01134
01135
01136
01137 int generic_writepages(struct address_space *mapping,
01138 struct writeback_control *wbc)
01139 {
01140
01141 if (!mapping->a_ops->writepage)
01142 return 0;
01143
01144 return write_cache_pages(mapping, wbc, __writepage, mapping);
01145 }
01146
01147 EXPORT_SYMBOL(generic_writepages);
01148
01149 int do_writepages(struct address_space *mapping, struct writeback_control *wbc)
01150 {
01151 int ret;
01152
01153 if (wbc->nr_to_write <= 0)
01154 return 0;
01155 wbc->for_writepages = 1;
01156 if (mapping->a_ops->writepages)
01157 ret = mapping->a_ops->writepages(mapping, wbc);
01158 else
01159 ret = generic_writepages(mapping, wbc);
01160 wbc->for_writepages = 0;
01161 return ret;
01162 }
01163
01164
01165
01166
01167
01168
01169
01170
01171
01172
01173 int write_one_page(struct page *page, int wait)
01174 {
01175 struct address_space *mapping = page->mapping;
01176 int ret = 0;
01177 struct writeback_control wbc = {
01178 .sync_mode = WB_SYNC_ALL,
01179 .nr_to_write = 1,
01180 };
01181
01182 BUG_ON(!PageLocked(page));
01183
01184 if (wait)
01185 wait_on_page_writeback(page);
01186
01187 if (clear_page_dirty_for_io(page)) {
01188 page_cache_get(page);
01189 ret = mapping->a_ops->writepage(page, &wbc);
01190 if (ret == 0 && wait) {
01191 wait_on_page_writeback(page);
01192 if (PageError(page))
01193 ret = -EIO;
01194 }
01195 page_cache_release(page);
01196 } else {
01197 unlock_page(page);
01198 }
01199 return ret;
01200 }
01201 EXPORT_SYMBOL(write_one_page);
01202
01203
01204
01205
01206 int __set_page_dirty_no_writeback(struct page *page)
01207 {
01208 if (!PageDirty(page))
01209 SetPageDirty(page);
01210 return 0;
01211 }
01212
01213
01214
01215
01216
01217
01218
01219
01220
01221
01222
01223
01224
01225
01226
01227
01228 int __set_page_dirty_nobuffers(struct page *page)
01229 {
01230 if (!TestSetPageDirty(page)) {
01231 struct address_space *mapping = page_mapping(page);
01232 struct address_space *mapping2;
01233
01234 if (!mapping)
01235 return 1;
01236
01237 spin_lock_irq(&mapping->tree_lock);
01238 mapping2 = page_mapping(page);
01239 if (mapping2) {
01240 BUG_ON(mapping2 != mapping);
01241 WARN_ON_ONCE(!PagePrivate(page) && !PageUptodate(page));
01242 if (mapping_cap_account_dirty(mapping)) {
01243 __inc_zone_page_state(page, NR_FILE_DIRTY);
01244 __inc_bdi_stat(mapping->backing_dev_info,
01245 BDI_RECLAIMABLE);
01246 task_dirty_inc(current);
01247 task_io_account_write(PAGE_CACHE_SIZE);
01248 }
01249 radix_tree_tag_set(&mapping->page_tree,
01250 page_index(page), PAGECACHE_TAG_DIRTY);
01251 }
01252 spin_unlock_irq(&mapping->tree_lock);
01253 if (mapping->host) {
01254
01255 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
01256 }
01257 return 1;
01258 }
01259 return 0;
01260 }
01261 EXPORT_SYMBOL(__set_page_dirty_nobuffers);
01262
01263
01264
01265
01266
01267
01268 int redirty_page_for_writepage(struct writeback_control *wbc, struct page *page)
01269 {
01270 wbc->pages_skipped++;
01271 return __set_page_dirty_nobuffers(page);
01272 }
01273 EXPORT_SYMBOL(redirty_page_for_writepage);
01274
01275
01276
01277
01278
01279 int set_page_dirty(struct page *page)
01280 {
01281 struct address_space *mapping = page_mapping(page);
01282
01283 if (likely(mapping)) {
01284 int (*spd)(struct page *) = mapping->a_ops->set_page_dirty;
01285 #ifdef CONFIG_BLOCK
01286 if (!spd)
01287 spd = __set_page_dirty_buffers;
01288 #endif
01289 return (*spd)(page);
01290 }
01291 if (!PageDirty(page)) {
01292 if (!TestSetPageDirty(page))
01293 return 1;
01294 }
01295 return 0;
01296 }
01297 EXPORT_SYMBOL(set_page_dirty);
01298
01299
01300
01301
01302
01303
01304
01305
01306
01307
01308
01309 int set_page_dirty_lock(struct page *page)
01310 {
01311 int ret;
01312
01313 lock_page_nosync(page);
01314 ret = set_page_dirty(page);
01315 unlock_page(page);
01316 return ret;
01317 }
01318 EXPORT_SYMBOL(set_page_dirty_lock);
01319 #endif
01320
01321
01322
01323
01324
01325
01326
01327
01328
01329
01330
01331
01332
01333
01334
01335 int clear_page_dirty_for_io(struct page *page)
01336 {
01337 struct address_space *mapping = page_mapping(page);
01338
01339 BUG_ON(!PageLocked(page));
01340
01341 ClearPageReclaim(page);
01342 if (mapping && mapping_cap_account_dirty(mapping)) {
01343
01344
01345
01346
01347
01348
01349
01350
01351
01352
01353
01354
01355
01356
01357
01358
01359
01360
01361
01362
01363
01364
01365
01366
01367
01368 if (page_mkclean(page))
01369 set_page_dirty(page);
01370
01371
01372
01373
01374
01375
01376
01377
01378
01379
01380 if (TestClearPageDirty(page)) {
01381 dec_zone_page_state(page, NR_FILE_DIRTY);
01382 dec_bdi_stat(mapping->backing_dev_info,
01383 BDI_RECLAIMABLE);
01384 return 1;
01385 }
01386 return 0;
01387 }
01388 return TestClearPageDirty(page);
01389 }
01390 EXPORT_SYMBOL(clear_page_dirty_for_io);
01391
01392 int test_clear_page_writeback(struct page *page)
01393 {
01394 struct address_space *mapping = page_mapping(page);
01395 int ret;
01396
01397 if (mapping) {
01398 struct backing_dev_info *bdi = mapping->backing_dev_info;
01399 unsigned long flags;
01400
01401 spin_lock_irqsave(&mapping->tree_lock, flags);
01402 ret = TestClearPageWriteback(page);
01403 if (ret) {
01404 radix_tree_tag_clear(&mapping->page_tree,
01405 page_index(page),
01406 PAGECACHE_TAG_WRITEBACK);
01407 if (bdi_cap_account_writeback(bdi)) {
01408 __dec_bdi_stat(bdi, BDI_WRITEBACK);
01409 __bdi_writeout_inc(bdi);
01410 }
01411 }
01412 spin_unlock_irqrestore(&mapping->tree_lock, flags);
01413 } else {
01414 ret = TestClearPageWriteback(page);
01415 }
01416 if (ret)
01417 dec_zone_page_state(page, NR_WRITEBACK);
01418 return ret;
01419 }
01420
01421 #ifndef DDE_LINUX
01422 int test_set_page_writeback(struct page *page)
01423 {
01424 struct address_space *mapping = page_mapping(page);
01425 int ret;
01426
01427 if (mapping) {
01428 struct backing_dev_info *bdi = mapping->backing_dev_info;
01429 unsigned long flags;
01430
01431 spin_lock_irqsave(&mapping->tree_lock, flags);
01432 ret = TestSetPageWriteback(page);
01433 if (!ret) {
01434 radix_tree_tag_set(&mapping->page_tree,
01435 page_index(page),
01436 PAGECACHE_TAG_WRITEBACK);
01437 if (bdi_cap_account_writeback(bdi))
01438 __inc_bdi_stat(bdi, BDI_WRITEBACK);
01439 }
01440 if (!PageDirty(page))
01441 radix_tree_tag_clear(&mapping->page_tree,
01442 page_index(page),
01443 PAGECACHE_TAG_DIRTY);
01444 spin_unlock_irqrestore(&mapping->tree_lock, flags);
01445 } else {
01446 ret = TestSetPageWriteback(page);
01447 }
01448 if (!ret)
01449 inc_zone_page_state(page, NR_WRITEBACK);
01450 return ret;
01451
01452 }
01453 EXPORT_SYMBOL(test_set_page_writeback);
01454 #endif
01455
01456
01457
01458
01459
01460 int mapping_tagged(struct address_space *mapping, int tag)
01461 {
01462 int ret;
01463 rcu_read_lock();
01464 ret = radix_tree_tagged(&mapping->page_tree, tag);
01465 rcu_read_unlock();
01466 return ret;
01467 }
01468 EXPORT_SYMBOL(mapping_tagged);