[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-API] [PATCH 2 of 2] [database]: make the database upgrade code more robust



# HG changeset patch
# User David Scott <dave.scott@xxxxxxxxxxxxx>
# Date 1257263472 0
# Node ID 9b07adfbf56554b155f75490f602c37277ec24e0
# Parent  0baec4029c0a9f2b15c40dc912ce3f4b82a09740
[database]: make the database upgrade code more robust

We always 'upgrade' the database on xapi start, irrespective of schema versions.
 -- it's nolonger a catastrophic error to forget to bump the schema version 
when adding a field or a table: these updates are now automatic
 -- it's nolonger a catastrophic error to bump the schema version and not have 
the exact schema version in the upgrade check

We now have a list of upgrade rules where each rule applies to schema versions 
<= some threshold. Any applicable rules are always applied.
 -- the rules can now reference product versions so we can keep track of them
 -- we can have rules for multiple product versions in one list and apply the 
correct sequence for both multi-step and single-step upgrades
 -- rule application failures are logged but do not cause xapi to exit

After 'upgrade' we don't restart xapi
 -- the database cache has the right contents for startup to proceed
 -- we unconditionally force-flush every database backend so the upgrade should 
be irreversible
 -- after a real host upgrade the backup database will be on the backup 
partition anyway: no need to do a little dance to keep a '.prev-version' file

On failure, continue to start xapi
 -- If the binary's schema is *older* than the database's, we log an error and 
continue; this is probably safe (similar to being a slave in a rolling upgrade)
 -- We never throw a schema mismatch exception: this was always a horrible 
failure mode, both for developers and users, requiring manual database XML 
hacking.

Signed-off-by: David Scott <dave.scott@xxxxxxxxxxxxx>

diff -r 0baec4029c0a -r 9b07adfbf565 ocaml/database/db_cache.ml
--- a/ocaml/database/db_cache.ml        Tue Nov 03 15:51:10 2009 +0000
+++ b/ocaml/database/db_cache.ml        Tue Nov 03 15:51:12 2009 +0000
@@ -494,26 +494,29 @@
        
       Db_connections.init_gen_count connections;
       (* If we have a temporary_restore_path (backup uploaded in previous run 
of xapi process) then restore from that *)
-      if Sys.file_exists Xapi_globs.db_temporary_restore_path then
-               begin
-                 (* we know that the backup is XML format so, to get the 
manifest, we jump right in and use the xml backend directly here.. *)
-                 let manifest = Backend_xml.populate_and_read_manifest 
Parse_db_conf.backup_file_dbconn in
-                 Db_backend.post_restore_hook manifest;
-                 (* delete file that contained backup *)
-                 Db_backend.try_and_delete_db_file 
Xapi_globs.db_temporary_restore_path;
-               end
+      let db = 
+       if Sys.file_exists Xapi_globs.db_temporary_restore_path then begin
+         (* we know that the backup is XML format so, to get the manifest, we 
jump right in and use the xml backend directly here.. *)
+         let manifest = Backend_xml.populate_and_read_manifest 
Parse_db_conf.backup_file_dbconn in
+         Db_backend.post_restore_hook manifest;
+         (* delete file that contained backup *)
+         Db_backend.try_and_delete_db_file 
Xapi_globs.db_temporary_restore_path;
+         Parse_db_conf.backup_file_dbconn
+       end
       else (* if there's no backup to restore from then.. *)
-               begin
-                 (* Check schema vsn is current; if not try and upgrade; if 
can't do that then fail startup.. *)
-                 let most_recent_db = Db_connections.pick_most_recent_db 
connections in
-                 (* populate gets all field names from the existing (old) db 
file, not the (current) schema... which is nice: *)
-                 Backend_xml.populate most_recent_db;
+       begin
+         (* Check schema vsn is current; if not try and upgrade; if can't do 
that then fail startup.. *)
+         let most_recent_db = Db_connections.pick_most_recent_db connections in
+         (* populate gets all field names from the existing (old) db file, not 
the (current) schema... which is nice: *)
+         Backend_xml.populate most_recent_db;
+         most_recent_db
+       end in
+      (* Always perform the generic database upgrade stuff *)
+      Db_upgrade.generic_database_upgrade ();
 
-                 debug "Path that I'm looking at to consider whether to 
upgrade = %s" most_recent_db.Parse_db_conf.path;
-                 if Sys.file_exists most_recent_db.Parse_db_conf.path then
-                   Db_upgrade.maybe_upgrade most_recent_db;
+      (* Then look to see whether we have specific upgrade rules to consider *)
+      if Sys.file_exists db.Parse_db_conf.path then Db_upgrade.maybe_upgrade 
db;
        
-               end;
       post_populate_hook ()
 
     let spawn_db_flush_threads() =
diff -r 0baec4029c0a -r 9b07adfbf565 ocaml/database/db_upgrade.ml
--- a/ocaml/database/db_upgrade.ml      Tue Nov 03 15:51:10 2009 +0000
+++ b/ocaml/database/db_upgrade.ml      Tue Nov 03 15:51:12 2009 +0000
@@ -33,6 +33,30 @@
 *)
 
 module Names = Db_names
+
+(** {Release-specific custom database upgrade rules} *)
+
+(** The type of an upgrade rule. The rules should ideally be idempotent and 
composable.
+    All new fields will have been created with default values and new tables 
will exist. *)
+type upgrade_rule = {
+  description: string;
+  version: int * int; (** rule will be applied if the schema version is <= 
this number *)
+  fn: unit -> unit;
+}
+
+(** Apply all the rules needed for the previous_version *)
+let apply_upgrade_rules rules previous_version = 
+  debug "Looking for database upgrade rules:";
+  let required_rules = List.filter (fun r -> previous_version <= r.version) 
rules in
+  List.iter
+    (fun r ->
+       debug "Applying database upgrade rule: %s" r.description;
+       try
+        r.fn ()
+       with exn ->
+        error "Database upgrade rule '%s' failed: %s" r.description 
(Printexc.to_string exn)
+    ) required_rules
+  
 
 let (+++) = Int64.add
 
@@ -185,9 +209,7 @@
                debug "Using generic BIOS strings";
                update_vms Xapi_globs.generic_bios_strings
 
-(* !!! This fn is release specific: REMEMBER TO UPDATE IT AS WE MOVE TO NEW 
RELEASES *)
-let non_generic_db_upgrade_rules () =
-
+let update_snapshots () = 
        (* GEORGE -> MIDNIGHT RIDE *)
        let vm_table = lookup_table_in_cache Db_backend.cache Names.vm in
        let vm_rows = get_rowlist vm_table in
@@ -207,93 +229,64 @@
                                set_field_in_row s2 Names.parent 
(lookup_field_in_row s1 Names.ref);
                                aux (s2 :: t) in
                aux ordered_snapshot_rows in
-       List.iter update_snapshots vm_rows;
+       List.iter update_snapshots vm_rows
 
-       upgrade_vm_records (); (* for DMC *)
-       upgrade_bios_strings () (* GEORGE OEM -> BODIE/MNR *)   
+(** A list of all the custom database upgrade rules known to the system. *)
+let upgrade_rules = 
+  let george = Datamodel.george_release_schema_major_vsn, 
Datamodel.george_release_schema_minor_vsn in
+  [ { description = "Updating snapshot parent references";
+      version = george;
+      fn = update_snapshots };
+    { description = "Upgrading VM memory fields for DMC";
+      version = george;
+      fn = upgrade_vm_records };
+    { description = "Upgrading VM BIOS strings";
+      version = george;
+      fn = upgrade_bios_strings } ]
 
-let upgrade_from_last_release dbconn =
-  (* NB the database cache has been populated already *)
-  debug "Database schema version is that of last release: attempting upgrade";
+(** {Generic database upgrade handling} *)
 
-  (* !!! UPDATE THIS WHEN MOVING TO NEW RELEASE !!! *)
-  let old_release = Datamodel_types.rel_george in
-  let this_release = Datamodel_types.rel_midnight_ride in
-
-  let objs_in_last_release =
-    List.filter (fun x -> List.mem old_release 
x.Datamodel_types.obj_release.Datamodel_types.internal) Db_backend.api_objs in
-  let table_names_in_last_release =
-    List.map (fun x->Escaping.escape_obj x.Datamodel_types.name) 
objs_in_last_release in
-
-  let objs_in_this_release =
-    List.filter (fun x -> List.mem this_release 
x.Datamodel_types.obj_release.Datamodel_types.internal) Db_backend.api_objs in
-  let table_names_in_this_release =
-    List.map (fun x->Escaping.escape_obj x.Datamodel_types.name) 
objs_in_this_release in
-
-  let table_names_new_in_this_release =
-    List.filter (fun tblname -> not (List.mem tblname 
table_names_in_last_release)) table_names_in_this_release in
-
-  (* we also have to ensure that the in-memory cache contains the new tables 
added in this release that will not have been
-     created by the proceeding populate (cos this is restricted to table names 
in last release). Unless the new tables are
-     explicitly added to the in-memory cache they will not be written out into 
the new db file across upgrade. In the XML
-     backend there's no schema file from which tables are created, so this 
needs to be made explicit..
-  *)
-  let create_blank_table_in_cache tblname =
-    let newtbl = create_empty_table () in
-    set_table_in_cache Db_backend.cache tblname newtbl in
-  List.iter create_blank_table_in_cache table_names_new_in_this_release;
-
+(** Automatically insert blank tables and new columns with default values *)
+let generic_database_upgrade () =
+  let existing_table_names = fold_over_tables (fun name _ acc -> name :: acc) 
Db_backend.cache [] in
+  let api_table_names = List.map (fun x -> Escaping.escape_obj 
x.Datamodel_types.name) Db_backend.api_objs in
+  let created_table_names = Listext.List.set_difference api_table_names 
existing_table_names in
+  let deleted_table_names = Listext.List.set_difference existing_table_names 
api_table_names in
+  List.iter (fun tblname ->
+              debug "Adding new database table: '%s'" tblname;
+              let newtbl = create_empty_table () in
+              set_table_in_cache Db_backend.cache tblname newtbl) 
created_table_names;
+  List.iter (fun tblname ->
+              debug "Ignoring legacy database table: '%s'" tblname
+           ) deleted_table_names;
+  
   (* for each table, go through and fill in missing default values *)
-  let add_default_fields_to_tbl tblname =
-    let tbl = lookup_table_in_cache Db_backend.cache tblname in
-    let rows = get_rowlist tbl in
-    let add_fields_to_row objref r =
-      let kvs = fold_over_fields (fun k v env -> (k,v)::env) r [] in
-      let new_kvs = Db_backend.add_default_kvs kvs tblname in
-      (* now blank r and fill it with new kvs: *)
-      let newrow = create_empty_row () in
-      List.iter (fun (k,v) -> set_field_in_row newrow k v) new_kvs;
-      set_row_in_table tbl objref newrow
-    in
-    iter_over_rows add_fields_to_row tbl in
-
-  (* Go and fill in default values *)
-  List.iter add_default_fields_to_tbl table_names_in_last_release;
-  
-  non_generic_db_upgrade_rules();
-
-  (* Now do the upgrade: *)
-  (* 1. move existing db out of the way *)
-  Unix.rename dbconn.Parse_db_conf.path (dbconn.Parse_db_conf.path ^ 
".prev_version." ^ (string_of_float (Unix.gettimeofday())));
-  let dbconn_to_flush_to = Db_connections.preferred_write_db() in
-  (* 2. create a new empty db file (with current schema) *)
-  Db_connections.create_empty_db dbconn_to_flush_to;
-  (* 3. mark all tables we want to write data into as dirty, and all rows as 
new *)
   List.iter
-    (fun tname ->
-          Db_dirty.set_all_dirty_table_status tname;
-          let rows = get_rowlist (lookup_table_in_cache Db_backend.cache 
tname) in
-          let objrefs = List.map (fun row -> lookup_field_in_row row 
Db_backend.reference_fname) rows in
-          List.iter (fun objref->Db_dirty.set_all_row_dirty_status objref 
Db_dirty.New) objrefs
-    )
-    table_names_in_last_release;
-  debug "Database upgrade complete, restarting to use new db";
-  (* 4. flush and exit with restart return code, so watchdog kicks xapi off 
again (this time with upgraded db) *)
-  ignore (Db_connections.flush_dirty_and_maybe_exit dbconn_to_flush_to (Some 
Xapi_globs.restart_return_code))
-
-exception Schema_mismatch
+    (fun tblname ->
+       let tbl = lookup_table_in_cache Db_backend.cache tblname in
+       let rows = get_rowlist tbl in
+       let add_fields_to_row objref r =
+        let kvs = fold_over_fields (fun k v env -> (k,v)::env) r [] in
+        let new_kvs = Db_backend.add_default_kvs kvs tblname in
+        (* now blank r and fill it with new kvs: *)
+        let newrow = create_empty_row () in
+        List.iter (fun (k,v) -> set_field_in_row newrow k v) new_kvs;
+        set_row_in_table tbl objref newrow
+       in
+       iter_over_rows add_fields_to_row tbl) 
+    api_table_names
 
 (* Maybe upgrade most recent db *)
 let maybe_upgrade most_recent_db =
-  debug "Considering upgrade...";
-  let major_vsn, minor_vsn = Backend_xml.read_schema_vsn most_recent_db in
-  debug "Db has schema major_vsn=%d, minor_vsn=%d (current is %d %d) (last is 
%d %d)" major_vsn minor_vsn Datamodel.schema_major_vsn 
Datamodel.schema_minor_vsn Datamodel.last_release_schema_major_vsn 
Datamodel.last_release_schema_minor_vsn;
-  begin
-    if major_vsn=Datamodel.schema_major_vsn && 
minor_vsn=Datamodel.schema_minor_vsn then
-      () (* current vsn: do nothing *)
-    else if major_vsn=Datamodel.last_release_schema_major_vsn && 
minor_vsn=Datamodel.last_release_schema_minor_vsn then begin
-      upgrade_from_last_release most_recent_db
-      (* Note: redo log is not active at present because HA is always disabled 
before an upgrade. *)
-      (* If this ever becomes not the case, consider invalidating the redo-log 
here (using Redo_log.empty()). *)
-    end else raise Schema_mismatch
-  end
+  let (previous_major_vsn, previous_minor_vsn) as previous_vsn = 
Backend_xml.read_schema_vsn most_recent_db in
+  let (latest_major_vsn, latest_minor_vsn) as latest_vsn = 
Datamodel.schema_major_vsn, Datamodel.schema_minor_vsn in
+  let previous_string = Printf.sprintf "(%d, %d)" previous_major_vsn 
previous_minor_vsn in
+  let latest_string = Printf.sprintf "(%d, %d)" latest_major_vsn 
latest_minor_vsn in
+  debug "Database schema version is %s; binary schema version is %s" 
previous_string latest_string;
+  if previous_vsn > latest_vsn
+  then warn "Database schema version %s is more recent than binary %s: 
downgrade is unsupported." previous_string previous_string
+  else 
+    if previous_vsn < latest_vsn 
+    then apply_upgrade_rules upgrade_rules previous_vsn
+    else
+      debug "Database schemas match, no upgrade required"
diff -r 0baec4029c0a -r 9b07adfbf565 ocaml/idl/datamodel.ml
--- a/ocaml/idl/datamodel.ml    Tue Nov 03 15:51:10 2009 +0000
+++ b/ocaml/idl/datamodel.ml    Tue Nov 03 15:51:12 2009 +0000
@@ -30,9 +30,12 @@
 let orlando_release_schema_major_vsn = 5
 let orlando_release_schema_minor_vsn = 55
 
+let george_release_schema_major_vsn = 5
+let george_release_schema_minor_vsn = 57
+
 (* the schema vsn of the last release: used to determine whether we can 
upgrade or not.. *)
-let last_release_schema_major_vsn = 5
-let last_release_schema_minor_vsn = 57
+let last_release_schema_major_vsn = george_release_schema_major_vsn
+let last_release_schema_minor_vsn = george_release_schema_minor_vsn
 
 (** Bindings for currently specified releases *)
 
3 files changed, 103 insertions(+), 104 deletions(-)
ocaml/database/db_cache.ml   |   37 +++++----
ocaml/database/db_upgrade.ml |  163 ++++++++++++++++++++----------------------
ocaml/idl/datamodel.ml       |    7 +


Attachment: xen-api.hg-2.patch
Description: Text Data

_______________________________________________
xen-api mailing list
xen-api@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/mailman/listinfo/xen-api

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.