1: <?php
2: declare(strict_types=1);
3: /**
4: * +------------------------------------------------------------+
5: * | apnscp |
6: * +------------------------------------------------------------+
7: * | Copyright (c) Apis Networks |
8: * +------------------------------------------------------------+
9: * | Licensed under Artistic License 2.0 |
10: * +------------------------------------------------------------+
11: * | Author: Matt Saladna (msaladna@apisnetworks.com) |
12: * +------------------------------------------------------------+
13: */
14:
15: use Daphnie\Collector;
16: use Daphnie\Contracts\MetricProvider;
17: use Daphnie\Metric;
18: use Module\Skeleton\Contracts\Hookable;
19: use Module\Skeleton\Contracts\Tasking;
20: use Opcenter\Provisioning\Cgroup as CgroupProvisioning;
21: use Opcenter\SiteConfiguration;
22: use Opcenter\System\Cgroup;
23:
24: /**
25: * Control group interfacing
26: *
27: * @package core
28: */
29: class Cgroup_Module extends Module_Skeleton implements Hookable, Tasking
30: {
31: const CGROUP_LOCATION = Cgroup::CGROUP_HOME;
32: const DEPENDENCY_MAP = [
33: 'siteinfo',
34: 'diskquota'
35: ];
36: const DEFAULT_MEMORY = 512;
37: const DEFAULT_CPU = 10240;
38: /** in MB */
39: const MAX_PROCS = 25;
40:
41: const METRIC_ATTR_CPU_USAGE = [
42: 'c-cpuacct-usage',
43: 'c-cpuacct-system',
44: 'c-cpuacct-user'
45: ];
46:
47: protected $exportedFunctions = [
48: '*' => PRIVILEGE_SITE | PRIVILEGE_USER | PRIVILEGE_ADMIN,
49: 'reset_peak_memory' => PRIVILEGE_SITE | PRIVILEGE_ADMIN,
50: 'frozen' => PRIVILEGE_ADMIN,
51: 'thaw' => PRIVILEGE_ADMIN,
52: 'freeze' => PRIVILEGE_ADMIN
53: ];
54:
55: /**
56: * Get controller usage
57: *
58: * @param string $controller
59: * @return array|false usage or controller exposes no data
60: */
61: public function get_usage(string $controller)
62: {
63: if (!IS_CLI && posix_getuid()) {
64: return $this->query('cgroup_get_usage', $controller);
65: }
66: if (!in_array($controller, $this->get_controllers(), true)) {
67: return error("unknown controller `%s'", $controller);
68: }
69:
70: $method = '_get_' . $controller . '_usage';
71: if (!method_exists($this, $method)) {
72: // don't know how to handle cgroup collection
73: return [];
74: }
75:
76: return $this->$method();
77: }
78:
79: /**
80: * Get available system cgroup controllers
81: *
82: * @return string[]
83: */
84: public function get_controllers(): array
85: {
86: return CGROUP_CONTROLLERS;
87: }
88:
89: /**
90: * Get cgroup name
91: *
92: * @return string|null
93: */
94: public function get_cgroup(): ?string
95: {
96: if ($this->permission_level & (PRIVILEGE_SITE | PRIVILEGE_USER)) {
97: return (string)(new Cgroup\Group($this->site));
98: }
99:
100: return null;
101: }
102:
103: /**
104: * Get configured limits
105: *
106: * @return array
107: */
108: public function get_limits(): array
109: {
110: $limits = $this->getServiceValue('cgroup');
111: if (!$limits['enabled']) {
112: return [];
113: }
114:
115: return array_except($limits, ['version', 'enabled']);
116: }
117:
118: /**
119: * Reset max memory usage
120: */
121: public function reset_peak_memory(): void
122: {
123: if (!IS_CLI) {
124: $this->query('cgroup_reset_peak_memory');
125: return;
126: }
127:
128: $group = new \Opcenter\System\Cgroup\Group(
129: $this->site,
130: );
131:
132: $controller = Cgroup\Controllers\Memory::make($group, null);
133: $controller->reset();
134: }
135:
136: /**
137: * cgroups enabled for site
138: *
139: * @return bool
140: */
141: public function enabled(): bool
142: {
143: return (bool)$this->getServiceValue('cgroup', 'enabled');
144: }
145:
146: public function _verify_conf(\Opcenter\Service\ConfigurationContext $ctx): bool
147: {
148: return true;
149: }
150:
151: public function _create()
152: { }
153:
154: public function _delete()
155: { }
156:
157: public function _edit()
158: { }
159:
160: public function _create_user(string $user)
161: {
162: return true;
163: }
164:
165: public function _delete_user(string $user)
166: {
167: return true;
168: }
169:
170: public function _edit_user(string $userold, string $usernew, array $oldpwd)
171: {
172: return true;
173: }
174:
175: public function _housekeeping()
176: {
177: if (!($test = $this->get_controllers()[0] ?? null)) {
178: return;
179: }
180: if (!Cgroup::mounted($test)) {
181: return error("%s not mounted", FILESYSTEM_SHARED . "/cgroup");
182: }
183: $webuser = $this->web_get_sys_user();
184: foreach (\Opcenter\Account\Enumerate::sites() as $site) {
185: if (!Auth::get_admin_from_site_id((int)substr($site, 4))) {
186: continue;
187: }
188: $group = new \Opcenter\System\Cgroup\Group(
189: $site,
190: [
191: 'task' => [
192: 'uid' => 'root',
193: 'gid' => \Auth::get_group_from_site($site),
194: 'fperm' => 0660
195: ]
196: ]
197: );
198: $ctx = null;
199: foreach (Cgroup::getControllers() as $c) {
200: $controller = \Opcenter\System\Cgroup\Controller::make($group, $c, []);
201: if (Cgroup::exists($controller)) {
202: \Error_Reporter::filter(static fn() => $controller->reset(), [':msg_cgroup_feature_unsupported']);
203: continue;
204: }
205:
206: if (null === $ctx && null === ($ctx = \Auth::nullableContext(null, $site))) {
207: continue 2;
208: }
209:
210: $controller->import($ctx);
211: if (!$controller->getAttributes()) {
212: continue;
213: }
214: $controller->create();
215: $group->add($controller);
216: }
217:
218: if ($group->getControllers()) {
219: report("Missed controller %(controller)s on %(group)s",
220: ['controller' => implode(', ', array_map(static fn($c) => $c->getName(), $group->getControllers())), 'group' => $group]);
221: // missed a controller, do a full import
222: CgroupProvisioning::createControllerConfiguration(SiteConfiguration::shallow($ctx));
223: }
224: }
225:
226: return true;
227: }
228:
229: /**
230: * Get controller memory usage
231: *
232: * @return array
233: */
234: private function _get_memory_usage(): array
235: {
236: $stats['limit'] = self::DEFAULT_MEMORY;
237: $stats = Cgroup::memory_usage($this->get_cgroup());
238: $sysMemory = \Opcenter\System\Memory::stats();
239: $maxMemory = $sysMemory['memtotal'] * 1024;
240: if ($this->permission_level & PRIVILEGE_ADMIN || $stats['limit'] === null) {
241: $stats['limit'] = $maxMemory;
242: $stats['free'] = $sysMemory['memavailable']*1024;
243: } else {
244: $stats['limit'] = min($stats['limit'], $maxMemory);
245: $stats['free'] = $stats['limit'] - $stats['used'];
246: }
247:
248: return $stats;
249: }
250:
251: /**
252: * Populate cgroup defaults on controller error
253: *
254: * @param array $usage
255: * @param array $defaults
256: * @return array
257: */
258: private function _fillUsage(array $usage, array $defaults): array
259: {
260: foreach ($defaults as $k => $v) {
261: if (!isset($usage[$k])) {
262: $usage[$k] = $v;
263: }
264: }
265:
266: return $usage;
267: }
268:
269: private function _get_cpuacct_usage(): array
270: {
271: return [];
272: }
273:
274: private function _get_pids_usage(): array
275: {
276: // @todo replace CPU maxproc with pids subsystem
277: $maxprocs = self::MAX_PROCS;
278: if ($this->permission_level & PRIVILEGE_ADMIN) {
279: $maxprocs = 999;
280: }
281:
282: return $this->_fillUsage(
283: Cgroup::pid_usage($this->get_cgroup()),
284: [
285: 'max' => $this->getServiceValue('cgroup', 'proclimit', $maxprocs)
286: ]
287: );
288: }
289:
290: private function _get_cpu_usage(): array
291: {
292: $maxcpu = self::DEFAULT_CPU;
293: $maxprocs = self::MAX_PROCS;
294: if ($this->permission_level & PRIVILEGE_ADMIN) {
295: $maxcpu = NPROC * 86400;
296: $maxprocs = 999;
297: }
298:
299: $usage = Cgroup::cpu_usage($this->get_cgroup());
300: if (($this->permission_level & PRIVILEGE_SITE) && TELEMETRY_ENABLED) {
301: $sum = $this->telemetry_range(self::METRIC_ATTR_CPU_USAGE, time()-86400, null, $this->site_id, true);
302: /**
303: * > .usage is measuring the wall clock nanoseconds whereas .stat is measuring the cpu cycles consumed.
304: * http://mail-archives.apache.org/mod_mbox/mesos-dev/201302.mbox/%3C20130214015558.21380.50889@reviews.apache.org%3E
305: */
306: // convert centiseconds to seconds
307: $cumusage = ($sum['c-cpuacct-usage'] ?? 0)/100;
308: $usage['cumusage'] = $cumusage ?: $usage['used'];
309: $usage['used'] = $cumusage;
310: $usage['cumuser'] = $usage['user'];
311: $usage['cumsystem'] = $usage['system'];
312: $usage['system'] = ($sum['c-cpuacct-system'] ?? 0)/100 ;
313: $usage['user'] = ($sum['c-cpuacct-user'] ?? 0)/ 100;
314: } else {
315: // note: poor approximation for uniform usage
316: debug("Telemetry disabled. Approximating CPU usage for %s", $this->site);
317: $ctime = filectime(Cgroup\Controllers\Cpuacct::make(
318: new Cgroup\Group($this->site), null
319: )->getPath());
320: $usage['used'] = $usage['used']/(microtime(true) - $ctime) * 86400;
321: }
322: $cpuLimit = $this->getServiceValue('cgroup', 'cpu', $maxcpu);
323: return $this->_fillUsage(
324: $usage,
325: [
326: 'limit' => $cpuLimit,
327: 'maxprocs' => $this->getServiceValue('cgroup', 'proclimit', $maxprocs),
328: 'cumusage' => $usage['used'],
329: 'free' => $cpuLimit - $usage['used']
330: ]
331: );
332: }
333:
334: private function _get_io_usage(): array
335: {
336: return $this->_get_blkio_usage();
337: }
338:
339: private function _get_blkio_usage(): array
340: {
341: return $this->_fillUsage(
342: Cgroup::io_usage($this->get_cgroup()),
343: [
344: 'iops-read' => $this->getServiceValue('cgroup', 'readiops', 100),
345: 'iops-write' => $this->getServiceValue('cgroup', 'writeiops', 100),
346: 'bw-read' => $this->getServiceValue('cgroup', 'readbw', 100),
347: 'bw-write' => $this->getServiceValue('cgroup', 'writebw', 100)
348: ]
349: );
350: }
351:
352: /**
353: * Convert site from thawed to frozen state
354: *
355: * @param string $spec site, site id, domain, invoice or any matchable identifier
356: * @return bool
357: */
358: public function freeze(string $spec): bool
359: {
360: if (!IS_CLI) {
361: return $this->query('cgroup_freeze', $spec);
362: }
363:
364: if (Cgroup::version() === 1 && !in_array('freezer', Cgroup::getControllers(), true)) {
365: return error("%s cgroup must be enabled", 'freezer');
366: }
367:
368: $sites = (array)Auth::get_site_id_from_anything($spec);
369: if (!$sites) {
370: return error("Unknown site spec `%s'", $spec);
371: }
372:
373: $frozen = true;
374: foreach ($sites as $siteid) {
375: $controller = Opcenter\System\Cgroup\Controllers\Freezer::make(
376: new \Opcenter\System\Cgroup\Group("site{$siteid}"),
377: null
378: );
379:
380: if (!$controller->exists()) {
381: warn("Controller `%(controller)s' missing for `%(site)s'", ['controller' => 'freezer', 'site' => "site{$siteid}"]);
382: $frozen &= 0;
383: }
384:
385: $frozen &= $controller->createAttribute('state', Cgroup\Attributes\Freezer\State::STATE_FROZEN)->activate();
386: }
387:
388: return (bool)$frozen;
389: }
390:
391: /**
392: * Convert site from frozen to thawed state
393: *
394: * @param string $spec site, site id, domain, invoice or any matchable identifier
395: * @return bool
396: */
397: public function thaw(string $spec): bool
398: {
399: if (!IS_CLI) {
400: return $this->query('cgroup_thaw', $spec);
401: }
402:
403: if (Cgroup::version() === 1 && !in_array('freezer', Cgroup::getControllers(), true)) {
404: return error("%s cgroup must be enabled", 'freezer');
405: }
406:
407: $sites = (array)Auth::get_site_id_from_anything($spec);
408: if (!$sites) {
409: return error("Unknown site spec `%s'", $spec);
410: }
411:
412: $thawed = true;
413: foreach ($sites as $siteid) {
414: $controller = Opcenter\System\Cgroup\Controllers\Freezer::make(
415: new \Opcenter\System\Cgroup\Group("site{$siteid}"),
416: null
417: );
418:
419: if (!$controller->exists()) {
420: continue;
421: }
422:
423: $thawed &= $controller->createAttribute('state',
424: Cgroup\Attributes\Freezer\State::STATE_THAWED)->activate();
425: }
426:
427: return (bool)$thawed;
428: }
429:
430: /**
431: * Site is in cgroup frozen state
432: *
433: * @param string $spec site, site id, domain, invoice or any matchable identifier
434: * @return bool
435: */
436: public function frozen(string $spec): bool
437: {
438: if (!IS_CLI) {
439: return $this->query('cgroup_frozen', $spec);
440: }
441:
442: if (Cgroup::version() === 1 && !in_array('freezer', Cgroup::getControllers(), true)) {
443: return error("%s cgroup must be enabled", 'freezer');
444: }
445:
446: $sites = (array)Auth::get_site_id_from_anything($spec);
447: if (!$sites) {
448: return error("Unknown site spec `%s'", $spec);
449: }
450: if (count($sites) > 1) {
451: warn("Multiple sites queried with site spec `%s'", $spec);
452: }
453:
454:
455: $frozen = true;
456: foreach ($sites as $siteid) {
457: $controller = Opcenter\System\Cgroup\Controllers\Freezer::make(
458: new \Opcenter\System\Cgroup\Group("site{$siteid}"),
459: null
460: );
461:
462: if (!$controller->exists()) {
463: $frozen = false;
464: continue;
465: }
466:
467: $frozen &= $controller->createAttribute('state', null)->frozen();
468: }
469:
470: return (bool)$frozen;
471: }
472:
473: public function version(): int
474: {
475: return (int)Cgroup::version();
476: }
477:
478: public function _cron(Cronus $cron) {
479:
480: if (CGROUP_RESET_PEAK > 0) {
481: $cron->schedule(CGROUP_RESET_PEAK, 'reset.max-memory', static function () {
482: $db = PostgreSQL::pdo();
483: $sites = (new \Opcenter\Database\PostgreSQL\Opcenter($db))->readSitesFromSiteinfo();
484: foreach (array_keys($sites) as $s) {
485: $s = "site{$s}";
486: $group = new Cgroup\Group($s);
487: $controller = Cgroup\Controller::make($group, 'memory');
488: if ($controller->exists()) {
489: $controller->reset();
490: }
491: }
492: });
493: }
494:
495: if (!TELEMETRY_ENABLED) {
496: return;
497: }
498:
499: $db = PostgreSQL::pdo();
500: $collector = new Collector($db);
501: // read from siteinfo table to guard protect against failed foreign key checks
502: $sites = (new \Opcenter\Database\PostgreSQL\Opcenter($db))->readSitesFromSiteinfo();
503: $sites[] = null; // system controller
504: $controllers = $this->get_controllers();
505: foreach (array_keys($sites) as $s) {
506: $s = "site{$s}";
507: $siteId = (int)substr($s, 4) ?: null;
508: $ts = time();
509: /**
510: * Approx 32k controllers/sec on testing VM (~5500 backend req/sec)
511: * This method should be fine with minimal performance degradation,
512: * may wish to switch to less OO approach in the future if bottlenecks appear
513: *
514: * Takes ~5ms to log all metrics for a site
515: */
516: $group = new Cgroup\Group($s);
517: $counters = [];
518: foreach ($controllers as $c) {
519: $controller = Cgroup\Controller::make($group, $c);
520: $attrs = (new Cgroup\MetricsLogging($controller))->getLoggableAttributes();
521: $counters[$c] = $controller->readMetrics(array_keys($attrs));
522: $reset = false;
523: foreach ($counters[$c] as $k => $v) {
524: if (!$reset && ((int)$v > Metric::FIELD_MAX_VALUE &&
525: $controller->getMetricDataType($k) === MetricProvider::TYPE_MONOTONIC)) {
526: // reconsitute metric type from "anonymous" metric via db. Registration method
527: // would hold onto a controller instance assigned to a specific site indefinitely
528: $reset = true;
529: }
530: $collector->add($attrs[$k], $siteId, (int)$v, $ts);
531: }
532:
533: if ($reset) {
534: $controller->reset();
535: }
536: }
537: }
538: $collector = null;
539:
540: return true;
541: }
542: }