Skip to content

Commit 24b109d

Browse files
committed
B #6772: Fix for NUMA and CPU Pinning Discrepancies During VM Save and Live Migration
Signed-off-by: Kristian Feldsam <feldsam@gmail.com>
1 parent ebc5b95 commit 24b109d

File tree

14 files changed

+239
-61
lines changed

14 files changed

+239
-61
lines changed

include/History.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,11 +114,13 @@ class History:public ObjectSQL, public ObjectXML
114114
std::string deployment_file;
115115
std::string context_file;
116116
std::string token_file;
117+
std::string migrate_file;
117118

118119
// Remote paths
119120
std::string checkpoint_file;
120121
std::string rdeployment_file;
121122
std::string system_dir;
123+
std::string rmigrate_file;
122124

123125
/**
124126
* Writes the history record in the DB

include/VirtualMachine.h

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -585,6 +585,19 @@ class VirtualMachine : public PoolObjectSQL
585585
return history->token_file;
586586
}
587587

588+
/**
589+
* Returns the migrate filename. The migrate file is in the form:
590+
* $ONE_LOCATION/var/vms/$VM_ID/migrate.$SEQ
591+
* or, in case that OpenNebula is installed in root
592+
* /var/lib/one/vms/$VM_ID/migrate.$SEQ
593+
* The hasHistory() function MUST be called before this one.
594+
* @return the migrate file path
595+
*/
596+
const std::string & get_migrate_file() const
597+
{
598+
return history->migrate_file;
599+
};
600+
588601
/**
589602
* Returns the remote deployment filename. The file is in the form:
590603
* $DS_LOCATION/$SYSTEM_DS/$VM_ID/deployment.$SEQ
@@ -596,6 +609,17 @@ class VirtualMachine : public PoolObjectSQL
596609
return history->rdeployment_file;
597610
};
598611

612+
/**
613+
* Returns the remote migrate filename. The file is in the form:
614+
* $DS_LOCATION/$SYSTEM_DS/$VM_ID/migrate.$SEQ
615+
* The hasHistory() function MUST be called before this one.
616+
* @return the migrate filename
617+
*/
618+
const std::string & get_rmigrate_file() const
619+
{
620+
return history->rmigrate_file;
621+
};
622+
599623
/**
600624
* Returns the checkpoint filename for the current host. The checkpoint file
601625
* is in the form:

include/VirtualMachineManager.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -376,7 +376,9 @@ class VirtualMachineManager :
376376
const std::string& tmpl,
377377
int ds_id,
378378
int sgid = -1,
379-
int nicid = -1);
379+
int nicid = -1,
380+
const std::string& lmfile = "",
381+
const std::string& rmfile = "");
380382

381383
public:
382384
/**

src/lcm/LifeCycleActions.cc

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -350,7 +350,11 @@ void LifeCycleManager::trigger_migrate(int vid, const RequestAttributes& ra,
350350

351351
if ( vm->get_hid() != vm->get_previous_hid() )
352352
{
353-
hpool->del_capacity(vm->get_previous_hid(), sr);
353+
HostShareCapacity prev_sr;
354+
Template tmpl;
355+
vm->get_previous_capacity(prev_sr, tmpl);
356+
357+
hpool->del_capacity(vm->get_previous_hid(), prev_sr);
354358

355359
vm->release_previous_vnc_port();
356360
}
@@ -1038,6 +1042,8 @@ void LifeCycleManager::clean_up_vm(VirtualMachine * vm, bool dispose,
10381042
int& image_id, int uid, int gid, int req_id, Template& quota_tmpl)
10391043
{
10401044
HostShareCapacity sr;
1045+
HostShareCapacity prev_sr;
1046+
Template tmpl;
10411047

10421048
time_t the_time = time(0);
10431049

@@ -1245,11 +1251,13 @@ void LifeCycleManager::clean_up_vm(VirtualMachine * vm, bool dispose,
12451251
case VirtualMachine::MIGRATE:
12461252
vm->set_running_etime(the_time);
12471253

1254+
vm->get_previous_capacity(prev_sr, tmpl);
1255+
12481256
vm->set_previous_etime(the_time);
12491257
vm->set_previous_vm_info();
12501258
vm->set_previous_running_etime(the_time);
12511259

1252-
hpool->del_capacity(vm->get_previous_hid(), sr);
1260+
hpool->del_capacity(vm->get_previous_hid(), prev_sr);
12531261

12541262
vmpool->update_previous_history(vm);
12551263

@@ -1268,11 +1276,13 @@ void LifeCycleManager::clean_up_vm(VirtualMachine * vm, bool dispose,
12681276
case VirtualMachine::SAVE_MIGRATE:
12691277
vm->set_running_etime(the_time);
12701278

1279+
vm->get_previous_capacity(prev_sr, tmpl);
1280+
12711281
vm->set_previous_etime(the_time);
12721282
vm->set_previous_vm_info();
12731283
vm->set_previous_running_etime(the_time);
12741284

1275-
hpool->del_capacity(vm->get_previous_hid(), sr);
1285+
hpool->del_capacity(vm->get_previous_hid(), prev_sr);
12761286

12771287
vmpool->update_previous_history(vm);
12781288

src/lcm/LifeCycleStates.cc

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -44,16 +44,6 @@ void LifeCycleManager::start_prolog_migrate(VirtualMachine* vm)
4444

4545
vm->set_state(VirtualMachine::PROLOG_MIGRATE);
4646

47-
vm->set_previous_etime(the_time);
48-
49-
vm->set_previous_running_etime(the_time);
50-
51-
vmpool->update_previous_history(vm);
52-
53-
vm->set_prolog_stime(the_time);
54-
55-
vmpool->update_history(vm);
56-
5747
if ( vm->get_hid() != vm->get_previous_hid() )
5848
{
5949
Template tmpl;
@@ -64,6 +54,16 @@ void LifeCycleManager::start_prolog_migrate(VirtualMachine* vm)
6454
vm->release_previous_vnc_port();
6555
}
6656

57+
vm->set_previous_etime(the_time);
58+
59+
vm->set_previous_running_etime(the_time);
60+
61+
vmpool->update_previous_history(vm);
62+
63+
vm->set_prolog_stime(the_time);
64+
65+
vmpool->update_history(vm);
66+
6767
vmpool->update(vm);
6868

6969
//----------------------------------------------------
@@ -290,6 +290,9 @@ void LifeCycleManager::trigger_deploy_success(int vid)
290290

291291
vm->set_running_stime(the_time);
292292

293+
Template tmpl;
294+
vm->get_previous_capacity(sr, tmpl);
295+
293296
vmpool->update_history(vm.get());
294297

295298
vm->set_previous_etime(the_time);
@@ -298,8 +301,6 @@ void LifeCycleManager::trigger_deploy_success(int vid)
298301

299302
vmpool->update_previous_history(vm.get());
300303

301-
vm->get_capacity(sr);
302-
303304
hpool->del_capacity(vm->get_previous_hid(), sr);
304305

305306
vm->set_state(VirtualMachine::RUNNING);

src/rm/RequestManagerVirtualMachine.cc

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1160,14 +1160,6 @@ void VirtualMachineMigrate::request_execute(xmlrpc_c::paramList const& paramList
11601160
return;
11611161
}
11621162

1163-
if (live && vm->is_pinned())
1164-
{
1165-
att.resp_msg = "VM with a pinned NUMA topology cannot be live-migrated";
1166-
failure_response(ACTION, att);
1167-
1168-
return;
1169-
}
1170-
11711163
// Get System DS information from current History record
11721164
c_ds_id = vm->get_ds_id();
11731165
c_tm_mad = vm->get_tm_mad();

src/vm/History.cc

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,11 @@ void History::non_persistent_data()
128128

129129
token_file = os.str();
130130

131+
os.str("");
132+
os << vm_lhome << "/migrate." << seq;
133+
134+
migrate_file = os.str();
135+
131136
// ----------- Remote Locations ------------
132137
os.str("");
133138
os << ds_location << "/" << ds_id << "/" << oid;
@@ -141,6 +146,11 @@ void History::non_persistent_data()
141146
os << system_dir << "/deployment." << seq;
142147

143148
rdeployment_file = os.str();
149+
150+
os.str("");
151+
os << system_dir << "/migrate." << seq;
152+
153+
rmigrate_file = os.str();
144154
}
145155

146156
/* -------------------------------------------------------------------------- */

src/vmm/LibVirtDriverKVM.cc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2298,5 +2298,7 @@ int LibVirtDriver::deployment_description_kvm(
22982298

22992299
file << "</domain>" << endl;
23002300

2301+
file.close();
2302+
23012303
return 0;
23022304
}

src/vmm/VirtualMachineManager.cc

Lines changed: 74 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -221,7 +221,9 @@ string VirtualMachineManager::format_message(
221221
const string& tmpl,
222222
int ds_id,
223223
int sgid,
224-
int nicid)
224+
int nicid,
225+
const string& lmfile,
226+
const string& rmfile)
225227
{
226228
ostringstream oss;
227229

@@ -266,6 +268,17 @@ string VirtualMachineManager::format_message(
266268
oss << "<REMOTE_DEPLOYMENT_FILE/>";
267269
}
268270

271+
if (!lmfile.empty())
272+
{
273+
oss << "<LOCAL_MIGRATE_FILE>" << lmfile << "</LOCAL_MIGRATE_FILE>";
274+
oss << "<REMOTE_MIGRATE_FILE>" << rmfile << "</REMOTE_MIGRATE_FILE>";
275+
}
276+
else
277+
{
278+
oss << "<LOCAL_MIGRATE_FILE/>";
279+
oss << "<REMOTE_MIGRATE_FILE/>";
280+
}
281+
269282
if (!cfile.empty())
270283
{
271284
oss << "<CHECKPOINT_FILE>" << cfile << "</CHECKPOINT_FILE>";
@@ -495,8 +508,9 @@ void VirtualMachineManager::trigger_save(int vid)
495508
trigger([this, vid]
496509
{
497510
const VirtualMachineManagerDriver * vmd;
511+
int rc;
498512

499-
string hostname, checkpoint_file;
513+
string hostname, checkpoint_file, migrate_file, rmigrate_file;
500514
string vm_tmpl;
501515
string drv_msg;
502516
int ds_id;
@@ -535,12 +549,31 @@ void VirtualMachineManager::trigger_save(int vid)
535549
hostname = vm->get_previous_hostname();
536550
checkpoint_file = vm->get_previous_checkpoint_file();
537551
ds_id = vm->get_previous_ds_id();
552+
553+
//Generate VM description file
554+
os << "Generating migrate file: " << vm->get_migrate_file();
555+
556+
vm->log("VMM", Log::INFO, os);
557+
558+
os.str("");
559+
560+
rc = vmd->deployment_description(vm.get(), vm->get_migrate_file());
561+
562+
if (rc != 0)
563+
{
564+
goto error_file;
565+
}
566+
567+
migrate_file = vm->get_migrate_file();
568+
rmigrate_file = vm->get_rmigrate_file();
538569
}
539570
else
540571
{
541572
hostname = vm->get_hostname();
542573
checkpoint_file = vm->get_checkpoint_file();
543574
ds_id = vm->get_ds_id();
575+
migrate_file = "";
576+
rmigrate_file = "";
544577
}
545578

546579
// Invoke driver method
@@ -556,7 +589,10 @@ void VirtualMachineManager::trigger_save(int vid)
556589
"",
557590
vm->to_xml(vm_tmpl),
558591
ds_id,
559-
-1);
592+
-1,
593+
-1,
594+
migrate_file,
595+
rmigrate_file);
560596

561597
vmd->save(vid, drv_msg);
562598

@@ -570,6 +606,11 @@ void VirtualMachineManager::trigger_save(int vid)
570606
os << "save_action, error getting driver " << vm->get_vmm_mad();
571607
goto error_common;
572608

609+
error_file:
610+
os << "save_action, error generating migrate file: "
611+
<< vm->get_migrate_file();
612+
goto error_common;
613+
573614
error_previous_history:
574615
os << "save_action, VM has no previous history";
575616

@@ -1154,10 +1195,12 @@ void VirtualMachineManager::trigger_migrate(int vid)
11541195
trigger([this, vid]
11551196
{
11561197
const VirtualMachineManagerDriver * vmd;
1198+
int rc;
11571199

11581200
ostringstream os;
11591201
string vm_tmpl;
11601202
string drv_msg;
1203+
string tm_command = "";
11611204

11621205
// Get the VM from the pool
11631206
auto vm = vmpool->get(vid);
@@ -1187,6 +1230,24 @@ void VirtualMachineManager::trigger_migrate(int vid)
11871230

11881231
Nebula::instance().get_tm()->migrate_transfer_command(vm.get(), os);
11891232

1233+
tm_command = os.str();
1234+
1235+
os.str("");
1236+
1237+
//Generate VM description file
1238+
os << "Generating migrate file: " << vm->get_migrate_file();
1239+
1240+
vm->log("VMM", Log::INFO, os);
1241+
1242+
os.str("");
1243+
1244+
rc = vmd->deployment_description(vm.get(), vm->get_migrate_file());
1245+
1246+
if (rc != 0)
1247+
{
1248+
goto error_file;
1249+
}
1250+
11901251
// Invoke driver method
11911252
drv_msg = format_message(
11921253
vm->get_previous_hostname(),
@@ -1195,12 +1256,15 @@ void VirtualMachineManager::trigger_migrate(int vid)
11951256
"",
11961257
"",
11971258
"",
1198-
os.str(),
1259+
tm_command,
11991260
"",
12001261
vm->get_system_dir(),
12011262
vm->to_xml(vm_tmpl),
12021263
vm->get_previous_ds_id(),
1203-
-1);
1264+
-1,
1265+
-1,
1266+
vm->get_migrate_file(),
1267+
vm->get_rmigrate_file());
12041268

12051269
vmd->migrate(vid, drv_msg);
12061270

@@ -1214,6 +1278,11 @@ void VirtualMachineManager::trigger_migrate(int vid)
12141278
os << "migrate_action, error getting driver " << vm->get_vmm_mad();
12151279
goto error_common;
12161280

1281+
error_file:
1282+
os << "migrate_action, error generating migrate file: "
1283+
<< vm->get_migrate_file();
1284+
goto error_common;
1285+
12171286
error_previous_history:
12181287
os << "migrate_action, error VM has no previous history";
12191288

0 commit comments

Comments
 (0)